diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py index ddf9bda3492de..a022c3c4dd2a7 100644 --- a/.github/scripts/label_title_regex.py +++ b/.github/scripts/label_title_regex.py @@ -1,10 +1,11 @@ """Labels PRs based on title. Must be run in a github action with the pull_request_target event.""" -from github import Github -import os import json +import os import re +from github import Github + context_dict = json.loads(os.getenv("CONTEXT_GITHUB")) repo = context_dict["repository"] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 643c2141819d5..abffbbe149f2c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,6 +14,7 @@ repos: rev: v0.0.272 hooks: - id: ruff + args: ["--fix", "--show-source"] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.3.0 hooks: diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py index ba460e6b503a6..457a15dd938e9 100644 --- a/asv_benchmarks/benchmarks/cluster.py +++ b/asv_benchmarks/benchmarks/cluster.py @@ -1,7 +1,7 @@ from sklearn.cluster import KMeans, MiniBatchKMeans from .common import Benchmark, Estimator, Predictor, Transformer -from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset +from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset from .utils import neg_mean_inertia diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py index c3e114a212047..aeea558844587 100644 --- a/asv_benchmarks/benchmarks/common.py +++ b/asv_benchmarks/benchmarks/common.py @@ -1,11 +1,11 @@ -import os +import itertools import json -import timeit +import os import pickle -import itertools +import timeit from abc import ABC, abstractmethod -from pathlib import Path from multiprocessing import cpu_count +from pathlib import Path import numpy as np diff --git a/asv_benchmarks/benchmarks/datasets.py b/asv_benchmarks/benchmarks/datasets.py index dbe0eac0b822c..8f0c915c95e63 100644 --- a/asv_benchmarks/benchmarks/datasets.py +++ b/asv_benchmarks/benchmarks/datasets.py @@ -1,21 +1,22 @@ +from pathlib import Path + import numpy as np import scipy.sparse as sp from joblib import Memory -from pathlib import Path -from sklearn.decomposition import TruncatedSVD from sklearn.datasets import ( - make_blobs, fetch_20newsgroups, + fetch_olivetti_faces, fetch_openml, load_digits, - make_regression, + make_blobs, make_classification, - fetch_olivetti_faces, + make_regression, ) -from sklearn.preprocessing import MaxAbsScaler, StandardScaler +from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MaxAbsScaler, StandardScaler # memory location for caching datasets M = Memory(location=str(Path(__file__).resolve().parent / "cache")) diff --git a/asv_benchmarks/benchmarks/decomposition.py b/asv_benchmarks/benchmarks/decomposition.py index 02a7862caeb69..0a7bb7ad07f3e 100644 --- a/asv_benchmarks/benchmarks/decomposition.py +++ b/asv_benchmarks/benchmarks/decomposition.py @@ -1,8 +1,8 @@ from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning from .common import Benchmark, Estimator, Transformer -from .datasets import _olivetti_faces_dataset, _mnist_dataset -from .utils import make_pca_scorers, make_dict_learning_scorers +from .datasets import _mnist_dataset, _olivetti_faces_dataset +from .utils import make_dict_learning_scorers, make_pca_scorers class PCABenchmark(Transformer, Estimator, Benchmark): diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py index 8c5a28e3da90f..c336d1e5f8805 100644 --- a/asv_benchmarks/benchmarks/ensemble.py +++ b/asv_benchmarks/benchmarks/ensemble.py @@ -1,7 +1,7 @@ from sklearn.ensemble import ( - RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, + RandomForestClassifier, ) from .common import Benchmark, Estimator, Predictor diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py index b694a109329f0..7e7b9d33540c6 100644 --- a/asv_benchmarks/benchmarks/linear_model.py +++ b/asv_benchmarks/benchmarks/linear_model.py @@ -1,9 +1,9 @@ from sklearn.linear_model import ( - LogisticRegression, - Ridge, ElasticNet, Lasso, LinearRegression, + LogisticRegression, + Ridge, SGDRegressor, ) diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py index c542349839178..a559bc59b5f8a 100644 --- a/benchmarks/bench_20newsgroups.py +++ b/benchmarks/bench_20newsgroups.py @@ -1,18 +1,19 @@ -from time import time import argparse -import numpy as np +from time import time -from sklearn.dummy import DummyClassifier +import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.metrics import accuracy_score -from sklearn.utils.validation import check_array - -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import AdaBoostClassifier +from sklearn.dummy import DummyClassifier +from sklearn.ensemble import ( + AdaBoostClassifier, + ExtraTreesClassifier, + RandomForestClassifier, +) from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score from sklearn.naive_bayes import MultinomialNB +from sklearn.utils.validation import check_array ESTIMATORS = { "dummy": DummyClassifier(), diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py index 8a13a2d9806c6..5b8cdd588c8ee 100644 --- a/benchmarks/bench_covertype.py +++ b/benchmarks/bench_covertype.py @@ -45,20 +45,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np from joblib import Memory from sklearn.datasets import fetch_covtype, get_data_home -from sklearn.svm import LinearSVC -from sklearn.linear_model import SGDClassifier, LogisticRegression +from sklearn.ensemble import ( + ExtraTreesClassifier, + GradientBoostingClassifier, + RandomForestClassifier, +) +from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.metrics import zero_one_loss from sklearn.naive_bayes import GaussianNB +from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.metrics import zero_one_loss from sklearn.utils import check_array # Memoize the data extraction and memory map the resulting diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py index fd5a4f0ebccff..b9d9efbdea4f1 100644 --- a/benchmarks/bench_feature_expansions.py +++ b/benchmarks/bench_feature_expansions.py @@ -1,8 +1,10 @@ +from time import time + import matplotlib.pyplot as plt import numpy as np import scipy.sparse as sparse + from sklearn.preprocessing import PolynomialFeatures -from time import time degree = 2 trials = 3 diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py index c6c2a6f5fa117..803043398d1ac 100644 --- a/benchmarks/bench_glm.py +++ b/benchmarks/bench_glm.py @@ -5,9 +5,10 @@ """ from datetime import datetime + import numpy as np -from sklearn import linear_model +from sklearn import linear_model if __name__ == "__main__": import matplotlib.pyplot as plt diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py index 8a0a0545bb627..7b111f95044e2 100644 --- a/benchmarks/bench_glmnet.py +++ b/benchmarks/bench_glmnet.py @@ -16,9 +16,11 @@ In both cases, only 10% of the features are informative. """ -import numpy as np import gc from time import time + +import numpy as np + from sklearn.datasets import make_regression alpha = 0.1 @@ -45,11 +47,11 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef): if __name__ == "__main__": - from glmnet.elastic_net import Lasso as GlmnetLasso - from sklearn.linear_model import Lasso as ScikitLasso - # Delayed import of matplotlib.pyplot import matplotlib.pyplot as plt + from glmnet.elastic_net import Lasso as GlmnetLasso + + from sklearn.linear_model import Lasso as ScikitLasso scikit_results = [] glmnet_results = [] diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py index 163e21f98ed0d..c1dfffabe71c2 100644 --- a/benchmarks/bench_hist_gradient_boosting.py +++ b/benchmarks/bench_hist_gradient_boosting.py @@ -1,15 +1,16 @@ -from time import time import argparse +from time import time import matplotlib.pyplot as plt import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression -from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py index 1b5905b1cf4e8..5fa5bbae0c35c 100644 --- a/benchmarks/bench_hist_gradient_boosting_adult.py +++ b/benchmarks/bench_hist_gradient_boosting_adult.py @@ -4,15 +4,14 @@ import numpy as np import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.compose import make_column_transformer, make_column_selector +from sklearn.compose import make_column_selector, make_column_transformer from sklearn.datasets import fetch_openml -from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split from sklearn.preprocessing import OrdinalEncoder - parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) parser.add_argument("--n-trees", type=int, default=100) diff --git a/benchmarks/bench_hist_gradient_boosting_categorical_only.py b/benchmarks/bench_hist_gradient_boosting_categorical_only.py index e8d215170f9c8..1085bbc49f4f8 100644 --- a/benchmarks/bench_hist_gradient_boosting_categorical_only.py +++ b/benchmarks/bench_hist_gradient_boosting_categorical_only.py @@ -1,11 +1,10 @@ import argparse from time import time -from sklearn.preprocessing import KBinsDiscretizer from sklearn.datasets import make_classification from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.preprocessing import KBinsDiscretizer parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py index d6ed3b8e9700f..65be02ec0c4b9 100644 --- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py +++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py @@ -1,17 +1,17 @@ -from urllib.request import urlretrieve +import argparse import os from gzip import GzipFile from time import time -import argparse +from urllib.request import urlretrieve import numpy as np import pandas as pd from joblib import Memory -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score, roc_auc_score + from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_threading.py b/benchmarks/bench_hist_gradient_boosting_threading.py index 70787fd2eb479..9acf65bdbaf6a 100644 --- a/benchmarks/bench_hist_gradient_boosting_threading.py +++ b/benchmarks/bench_hist_gradient_boosting_threading.py @@ -1,18 +1,19 @@ -from time import time import argparse import os from pprint import pprint +from time import time import numpy as np from threadpoolctl import threadpool_limits + import sklearn -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) @@ -290,8 +291,8 @@ def one_run(n_threads, n_samples): if args.plot or args.plot_filename: - import matplotlib.pyplot as plt import matplotlib + import matplotlib.pyplot as plt fig, axs = plt.subplots(2, figsize=(12, 12)) diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py index 1c85cfb79d321..021114af56ea6 100644 --- a/benchmarks/bench_isolation_forest.py +++ b/benchmarks/bench_isolation_forest.py @@ -17,12 +17,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml from sklearn.ensemble import IsolationForest -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.preprocessing import LabelBinarizer from sklearn.utils import shuffle as sh diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py index 458a04a463303..221e6fb12da75 100644 --- a/benchmarks/bench_isotonic.py +++ b/benchmarks/bench_isotonic.py @@ -10,13 +10,15 @@ This allows the scaling of the algorithm with the problem size to be visualized and understood. """ -import numpy as np +import argparse import gc from datetime import datetime -from sklearn.isotonic import isotonic_regression -from scipy.special import expit + import matplotlib.pyplot as plt -import argparse +import numpy as np +from scipy.special import expit + +from sklearn.isotonic import isotonic_regression def generate_perturbed_logarithm_dataset(size): diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index 00721aa7f18a9..6551cb74ff86e 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -39,13 +39,12 @@ import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py index a40ddea4506dd..26a45ca9f09ca 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py @@ -41,13 +41,12 @@ import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py index 9a893545fbb28..1c49c6f5cabdf 100644 --- a/benchmarks/bench_lasso.py +++ b/benchmarks/bench_lasso.py @@ -13,6 +13,7 @@ """ import gc from time import time + import numpy as np from sklearn.datasets import make_regression @@ -59,9 +60,10 @@ def compute_bench(alpha, n_samples, n_features, precompute): if __name__ == "__main__": - from sklearn.linear_model import Lasso, LassoLars import matplotlib.pyplot as plt + from sklearn.linear_model import Lasso, LassoLars + alpha = 0.01 # regularization parameter n_features = 10 diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py index 31057e2e4067b..8652073a7203d 100644 --- a/benchmarks/bench_lof.py +++ b/benchmarks/bench_lof.py @@ -18,11 +18,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.neighbors import LocalOutlierFactor -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml from sklearn.preprocessing import LabelBinarizer print(__doc__) diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py index 4bc28ea1a165d..4ba17cb1003c3 100644 --- a/benchmarks/bench_mnist.py +++ b/benchmarks/bench_mnist.py @@ -30,26 +30,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np from joblib import Memory -from sklearn.datasets import fetch_openml -from sklearn.datasets import get_data_home -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import fetch_openml, get_data_home from sklearn.dummy import DummyClassifier -from sklearn.kernel_approximation import Nystroem -from sklearn.kernel_approximation import RBFSampler +from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier +from sklearn.kernel_approximation import Nystroem, RBFSampler +from sklearn.linear_model import LogisticRegression from sklearn.metrics import zero_one_loss +from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier from sklearn.utils import check_array -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier # Memoize the data extraction and memory map the resulting # train / test splits in readonly mode diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py index 2a87b388e91a2..1b8449a24da51 100755 --- a/benchmarks/bench_multilabel_metrics.py +++ b/benchmarks/bench_multilabel_metrics.py @@ -3,26 +3,25 @@ A comparison of multilabel target formats and metrics over them """ -from timeit import timeit -from functools import partial -import itertools import argparse +import itertools import sys +from functools import partial +from timeit import timeit import matplotlib.pyplot as plt -import scipy.sparse as sp import numpy as np +import scipy.sparse as sp from sklearn.datasets import make_multilabel_classification from sklearn.metrics import ( - f1_score, accuracy_score, + f1_score, hamming_loss, jaccard_similarity_score, ) from sklearn.utils._testing import ignore_warnings - METRICS = { "f1": partial(f1_score, average="micro"), "f1-by-sample": partial(f1_score, average="samples"), diff --git a/benchmarks/bench_online_ocsvm.py b/benchmarks/bench_online_ocsvm.py index 37af2fdd76562..9f92150e079dd 100644 --- a/benchmarks/bench_online_ocsvm.py +++ b/benchmarks/bench_online_ocsvm.py @@ -15,21 +15,20 @@ """ from time import time -import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import numpy as np from scipy.interpolate import interp1d -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype -from sklearn.preprocessing import LabelBinarizer, StandardScaler -from sklearn.pipeline import make_pipeline -from sklearn.utils import shuffle +from sklearn.datasets import fetch_covtype, fetch_kddcup99 from sklearn.kernel_approximation import Nystroem -from sklearn.svm import OneClassSVM from sklearn.linear_model import SGDOneClassSVM - -import matplotlib.pyplot as plt -import matplotlib +from sklearn.metrics import auc, roc_curve +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import LabelBinarizer, StandardScaler +from sklearn.svm import OneClassSVM +from sklearn.utils import shuffle font = {"weight": "normal", "size": 15} diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py index 0f42e4b630f1d..49b87c8c7060a 100644 --- a/benchmarks/bench_plot_incremental_pca.py +++ b/benchmarks/bench_plot_incremental_pca.py @@ -7,13 +7,15 @@ """ -import numpy as np import gc -from time import time from collections import defaultdict +from time import time + import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_lfw_people -from sklearn.decomposition import IncrementalPCA, PCA +from sklearn.decomposition import PCA, IncrementalPCA def plot_results(X, y, label): diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py index c372ee07117fc..c996c9c09520f 100644 --- a/benchmarks/bench_plot_lasso_path.py +++ b/benchmarks/bench_plot_lasso_path.py @@ -2,16 +2,15 @@ The input data is mostly low rank but is a fat infinite tail. """ -from collections import defaultdict import gc import sys +from collections import defaultdict from time import time import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram -from sklearn.linear_model import lasso_path from sklearn.datasets import make_regression +from sklearn.linear_model import lars_path, lars_path_gram, lasso_path def compute_bench(samples_range, features_range): diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py index c6e5541eda6f3..2d9cf2b08b71d 100644 --- a/benchmarks/bench_plot_neighbors.py +++ b/benchmarks/bench_plot_neighbors.py @@ -3,11 +3,11 @@ """ from time import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib import ticker -from sklearn import neighbors, datasets +from sklearn import datasets, neighbors def get_data(N, D, dataset="dense"): diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index 78d6ad875cc34..d23191df0fbc9 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -6,28 +6,25 @@ # Anthony Di Franco (projected gradient, Python and NumPy port) # License: BSD 3 clause -from time import time +import numbers import sys import warnings -import numbers +from time import time -import numpy as np import matplotlib.pyplot as plt -from joblib import Memory +import numpy as np import pandas +from joblib import Memory -from sklearn.utils._testing import ignore_warnings -from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition import NMF -from sklearn.decomposition._nmf import _initialize_nmf -from sklearn.decomposition._nmf import _beta_divergence -from sklearn.decomposition._nmf import _check_init +from sklearn.decomposition._nmf import _beta_divergence, _check_init, _initialize_nmf from sklearn.exceptions import ConvergenceWarning -from sklearn.utils.extmath import safe_sparse_dot, squared_norm +from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.utils import check_array +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.extmath import safe_sparse_dot, squared_norm from sklearn.utils.validation import check_is_fitted, check_non_negative - mem = Memory(cachedir=".", verbose=0) ################### diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index a800b3ebe2ba9..ec1bf3281f3a4 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -9,8 +9,8 @@ import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp from sklearn.datasets import make_sparse_coded_signal +from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp def compute_bench(samples_range, features_range): diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py index a41e3fab20589..ca12972f9be6c 100644 --- a/benchmarks/bench_plot_parallel_pairwise.py +++ b/benchmarks/bench_plot_parallel_pairwise.py @@ -4,9 +4,8 @@ import matplotlib.pyplot as plt +from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels from sklearn.utils import check_random_state -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.metrics.pairwise import pairwise_kernels def plot(func): diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py index b21589263a49f..ad89d974f3d93 100644 --- a/benchmarks/bench_plot_polynomial_kernel_approximation.py +++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py @@ -42,21 +42,21 @@ # License: BSD 3 clause # Load data manipulation functions -from sklearn.datasets import load_digits -from sklearn.model_selection import train_test_split +# Will use this for timing results +from time import time # Some common libraries import matplotlib.pyplot as plt import numpy as np -# Will use this for timing results -from time import time - -# Import SVM classifiers and feature map approximation algorithms -from sklearn.svm import LinearSVC, SVC +from sklearn.datasets import load_digits from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch +from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline +# Import SVM classifiers and feature map approximation algorithms +from sklearn.svm import SVC, LinearSVC + # Split data in train and test sets X, y = load_digits()["data"], load_digits()["target"] X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7) diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py index 2020096a21b88..9ac4e714cb7dc 100644 --- a/benchmarks/bench_plot_randomized_svd.py +++ b/benchmarks/bench_plot_randomized_svd.py @@ -65,28 +65,29 @@ # Author: Giorgio Patrini -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - import gc +import os.path import pickle -from time import time from collections import defaultdict -import os.path +from time import time + +import matplotlib.pyplot as plt +import numpy as np +import scipy as sp -from sklearn.utils._arpack import _init_arpack_v0 -from sklearn.utils import gen_batches -from sklearn.utils.validation import check_random_state -from sklearn.utils.extmath import randomized_svd -from sklearn.datasets import make_low_rank_matrix, make_sparse_uncorrelated from sklearn.datasets import ( - fetch_lfw_people, - fetch_openml, fetch_20newsgroups_vectorized, + fetch_lfw_people, fetch_olivetti_faces, + fetch_openml, fetch_rcv1, + make_low_rank_matrix, + make_sparse_uncorrelated, ) +from sklearn.utils import gen_batches +from sklearn.utils._arpack import _init_arpack_v0 +from sklearn.utils.extmath import randomized_svd +from sklearn.utils.validation import check_random_state try: import fbpca diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index fc370d1073be1..abd2c6fe9d4d4 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -3,13 +3,14 @@ The data is mostly low rank but is a fat infinite tail. """ import gc -from time import time -import numpy as np from collections import defaultdict +from time import time +import numpy as np from scipy.linalg import svd -from sklearn.utils.extmath import randomized_svd + from sklearn.datasets import make_low_rank_matrix +from sklearn.utils.extmath import randomized_svd def compute_bench(samples_range, features_range, n_iter=3, rank=50): diff --git a/benchmarks/bench_plot_ward.py b/benchmarks/bench_plot_ward.py index 696e833eede20..fe5cee201dff4 100644 --- a/benchmarks/bench_plot_ward.py +++ b/benchmarks/bench_plot_ward.py @@ -4,9 +4,9 @@ import time +import matplotlib.pyplot as plt import numpy as np from scipy.cluster import hierarchy -import matplotlib.pyplot as plt from sklearn.cluster import AgglomerativeClustering diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py index 89a4550944f3f..bd8c62ecba484 100644 --- a/benchmarks/bench_random_projections.py +++ b/benchmarks/bench_random_projections.py @@ -6,19 +6,19 @@ Benchmarks for random projections. """ +import collections import gc -import sys import optparse +import sys from datetime import datetime -import collections import numpy as np import scipy.sparse as sp from sklearn import clone from sklearn.random_projection import ( - SparseRandomProjection, GaussianRandomProjection, + SparseRandomProjection, johnson_lindenstrauss_min_dim, ) diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py index 2254ab81f30a4..166c6c2f5f9d1 100644 --- a/benchmarks/bench_rcv1_logreg_convergence.py +++ b/benchmarks/bench_rcv1_logreg_convergence.py @@ -3,14 +3,15 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt -from joblib import Memory -import numpy as np import gc import time -from sklearn.linear_model import LogisticRegression, SGDClassifier +import matplotlib.pyplot as plt +import numpy as np +from joblib import Memory + from sklearn.datasets import fetch_rcv1 +from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.linear_model._sag import get_auto_step_size try: diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py index 340549ef240e1..dc2ed093f11d0 100644 --- a/benchmarks/bench_saga.py +++ b/benchmarks/bench_saga.py @@ -4,24 +4,24 @@ in using multinomial logistic regression in term of learning time. """ import json -import time import os +import time -from sklearn.utils.parallel import delayed, Parallel import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import ( + fetch_20newsgroups_vectorized, fetch_rcv1, - load_iris, load_digits, - fetch_20newsgroups_vectorized, + load_iris, ) from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelBinarizer, LabelEncoder from sklearn.utils.extmath import safe_sparse_dot, softmax +from sklearn.utils.parallel import Parallel, delayed def fit_single( diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index 10baad5a8495f..743292ca5fa61 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -3,14 +3,14 @@ """ import gc -import sys +import operator import optparse +import random +import sys from datetime import datetime -import operator import matplotlib.pyplot as plt import numpy as np -import random from sklearn.utils.random import sample_without_replacement diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py index 47dd9e9fc758b..4b1b902795feb 100644 --- a/benchmarks/bench_sgd_regression.py +++ b/benchmarks/bench_sgd_regression.py @@ -1,16 +1,15 @@ # Author: Peter Prettenhofer # License: BSD 3 clause -import numpy as np -import matplotlib.pyplot as plt - import gc - from time import time -from sklearn.linear_model import Ridge, SGDRegressor, ElasticNet -from sklearn.metrics import mean_squared_error +import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_regression +from sklearn.linear_model import ElasticNet, Ridge, SGDRegressor +from sklearn.metrics import mean_squared_error """ Benchmark for SGD regression diff --git a/benchmarks/bench_sparsify.py b/benchmarks/bench_sparsify.py index f1aa482b8b732..1832ca40c6ddb 100644 --- a/benchmarks/bench_sparsify.py +++ b/benchmarks/bench_sparsify.py @@ -43,8 +43,9 @@ 60 300 381409 1271.4 97.1 clf.predict(X_test_sparse) """ -from scipy.sparse import csr_matrix import numpy as np +from scipy.sparse import csr_matrix + from sklearn.linear_model import SGDRegressor from sklearn.metrics import r2_score diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py index 6d75d57658500..31d4141d1af97 100644 --- a/benchmarks/bench_text_vectorizers.py +++ b/benchmarks/bench_text_vectorizers.py @@ -8,8 +8,8 @@ * psutil (optional, but recommended) """ -import timeit import itertools +import timeit import numpy as np import pandas as pd @@ -18,8 +18,8 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import ( CountVectorizer, - TfidfVectorizer, HashingVectorizer, + TfidfVectorizer, ) n_repeat = 3 diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py index c23ef627e237e..29cd7584432b7 100644 --- a/benchmarks/bench_tree.py +++ b/benchmarks/bench_tree.py @@ -13,11 +13,12 @@ training set, classify a sample and plot the time taken as a function of the number of dimensions. """ -import numpy as np -import matplotlib.pyplot as plt import gc from datetime import datetime +import matplotlib.pyplot as plt +import numpy as np + # to store the results scikit_classifier_results = [] scikit_regressor_results = [] diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py index e399e891cb94e..39462b33d9655 100644 --- a/benchmarks/bench_tsne_mnist.py +++ b/benchmarks/bench_tsne_mnist.py @@ -7,18 +7,19 @@ # License: BSD 3 clause +import argparse +import json import os import os.path as op from time import time + import numpy as np -import json -import argparse from joblib import Memory from sklearn.datasets import fetch_openml +from sklearn.decomposition import PCA from sklearn.manifold import TSNE from sklearn.neighbors import NearestNeighbors -from sklearn.decomposition import PCA from sklearn.utils import check_array from sklearn.utils import shuffle as _shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads diff --git a/benchmarks/plot_tsne_mnist.py b/benchmarks/plot_tsne_mnist.py index d32e3dd769d6a..fff71eed0a26c 100644 --- a/benchmarks/plot_tsne_mnist.py +++ b/benchmarks/plot_tsne_mnist.py @@ -1,9 +1,8 @@ -import matplotlib.pyplot as plt -import numpy as np -import os.path as op - import argparse +import os.path as op +import matplotlib.pyplot as plt +import numpy as np LOG_DIR = "mnist_tsne_output" diff --git a/build_tools/azure/get_commit_message.py b/build_tools/azure/get_commit_message.py index 239da5b8c4498..0b1246b8d2724 100644 --- a/build_tools/azure/get_commit_message.py +++ b/build_tools/azure/get_commit_message.py @@ -1,6 +1,6 @@ +import argparse import os import subprocess -import argparse def get_commit_message(): diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py index dfcc600957469..345e08b4bece4 100755 --- a/build_tools/circle/list_versions.py +++ b/build_tools/circle/list_versions.py @@ -4,9 +4,9 @@ import json import re import sys +from urllib.request import urlopen from sklearn.utils.fixes import parse_version -from urllib.request import urlopen def json_urlread(url): diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py index d4da0db5be3c1..c930a9b2956d1 100644 --- a/build_tools/generate_authors_table.py +++ b/build_tools/generate_authors_table.py @@ -6,12 +6,13 @@ The table should be updated for each new inclusion in the teams. Generating the table requires admin rights. """ -import sys -import requests import getpass +import sys import time -from pathlib import Path from os import path +from pathlib import Path + +import requests print("user:", file=sys.stderr) user = input() diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py index 99d319cba4dc5..3860d3e81adb7 100644 --- a/build_tools/github/check_wheels.py +++ b/build_tools/github/check_wheels.py @@ -1,8 +1,9 @@ """Checks that dist/* contains the number of wheels built from the .github/workflows/wheels.yml config.""" -import yaml -from pathlib import Path import sys +from pathlib import Path + +import yaml gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml" with gh_wheel_path.open("r") as f: diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py index 2997688423b84..3bc1aceb3437c 100644 --- a/build_tools/github/vendor.py +++ b/build_tools/github/vendor.py @@ -7,7 +7,6 @@ import sys import textwrap - TARGET_FOLDER = op.join("sklearn", ".libs") DISTRIBUTOR_INIT = op.join("sklearn", "_distributor_init.py") VCOMP140_SRC_PATH = "C:\\Windows\\System32\\vcomp140.dll" diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py index a4a5c92b077d2..960c01d4383b8 100644 --- a/build_tools/update_environments_and_lock_files.py +++ b/build_tools/update_environments_and_lock_files.py @@ -29,20 +29,18 @@ """ +import json +import logging import re +import shlex import subprocess import sys -from pathlib import Path -import shlex -import json -import logging from importlib.metadata import version - -from packaging.version import Version +from pathlib import Path import click - from jinja2 import Environment +from packaging.version import Version logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/doc/conf.py b/doc/conf.py index 6ac1b4e231822..73281f02899d4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,14 +10,15 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os -import warnings import re +import sys +import warnings from datetime import datetime -from sklearn.externals._packaging.version import parse -from pathlib import Path from io import StringIO +from pathlib import Path + +from sklearn.externals._packaging.version import parse # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory @@ -25,10 +26,10 @@ # absolute, like shown here. sys.path.insert(0, os.path.abspath("sphinxext")) -from github_link import make_linkcode_resolve import sphinx_gallery +from github_link import make_linkcode_resolve +from sphinx_gallery.notebook import add_code_cell, add_markdown_cell from sphinx_gallery.sorting import ExampleTitleSortKey -from sphinx_gallery.notebook import add_markdown_cell, add_code_cell try: # Configure plotly to integrate its output into the HTML pages generated by diff --git a/doc/conftest.py b/doc/conftest.py index 73848ccf392fb..ca94ad1ed2b60 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -1,16 +1,14 @@ import os -from os.path import exists -from os.path import join -from os import environ import warnings +from os import environ +from os.path import exists, join -from sklearn.utils import IS_PYPY -from sklearn.utils._testing import SkipTest -from sklearn.utils._testing import check_skip_network -from sklearn.utils.fixes import parse_version from sklearn.datasets import get_data_home from sklearn.datasets._base import _pkl_filepath from sklearn.datasets._twenty_newsgroups import CACHE_NAME +from sklearn.utils import IS_PYPY +from sklearn.utils._testing import SkipTest, check_skip_network +from sklearn.utils.fixes import parse_version def setup_labeled_faces(): diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py index e8f94506daaa5..89d7077bce2b5 100755 --- a/doc/sphinxext/allow_nan_estimators.py +++ b/doc/sphinxext/allow_nan_estimators.py @@ -1,11 +1,12 @@ -from sklearn.utils import all_estimators -from sklearn.utils.estimator_checks import _construct_instance -from sklearn.utils._testing import SkipTest -from docutils import nodes from contextlib import suppress +from docutils import nodes from docutils.parsers.rst import Directive +from sklearn.utils import all_estimators +from sklearn.utils._testing import SkipTest +from sklearn.utils.estimator_checks import _construct_instance + class AllowNanEstimators(Directive): @staticmethod diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py index 7d188969bb778..32e905fe650ea 100644 --- a/doc/sphinxext/doi_role.py +++ b/doc/sphinxext/doi_role.py @@ -15,7 +15,6 @@ """ from docutils import nodes, utils - from sphinx.util.nodes import split_explicit_title diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py index 3992d814b825e..d3e43c8ed0f5e 100644 --- a/doc/sphinxext/github_link.py +++ b/doc/sphinxext/github_link.py @@ -1,9 +1,9 @@ -from operator import attrgetter import inspect -import subprocess import os +import subprocess import sys from functools import partial +from operator import attrgetter REVISION_CMD = "git rev-parse --short HEAD" diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py index ecd270354ab76..12e285096726d 100644 --- a/examples/applications/plot_cyclical_feature_engineering.py +++ b/examples/applications/plot_cyclical_feature_engineering.py @@ -35,7 +35,6 @@ # demand around the middle of the days: import matplotlib.pyplot as plt - fig, ax = plt.subplots(figsize=(12, 4)) average_week_demand = df.groupby(["weekday", "hour"])["count"].mean() average_week_demand.plot(ax=ax) @@ -181,12 +180,11 @@ # # The numerical variables need no preprocessing and, for the sake of simplicity, # we only try the default hyper-parameters for this model: -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import OrdinalEncoder from sklearn.compose import ColumnTransformer from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.model_selection import cross_validate - +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import OrdinalEncoder categorical_columns = [ "weather", @@ -262,11 +260,10 @@ def evaluate(model, X, y, cv): # For consistency, we scale the numerical features to the same 0-1 range using # class:`sklearn.preprocessing.MinMaxScaler`, although in this case it does not # impact the results much because they are already on comparable scales: -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import MinMaxScaler -from sklearn.linear_model import RidgeCV import numpy as np +from sklearn.linear_model import RidgeCV +from sklearn.preprocessing import MinMaxScaler, OneHotEncoder one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False) alphas = np.logspace(-6, 6, 25) @@ -619,9 +616,8 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # However, it is possible to use the `PolynomialFeatures` class on coarse # grained spline encoded hours to model the "workingday"/"hours" interaction # explicitly without introducing too many new variables: -from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import FeatureUnion - +from sklearn.preprocessing import PolynomialFeatures hour_workday_interaction = make_pipeline( ColumnTransformer( @@ -668,7 +664,6 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # polynomial kernel expansion. Let us try the latter: from sklearn.kernel_approximation import Nystroem - cyclic_spline_poly_pipeline = make_pipeline( cyclic_spline_transformer, Nystroem(kernel="poly", degree=2, n_components=300, random_state=0), diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py index 72637b6ab036f..bd8d5b1b7b037 100644 --- a/examples/applications/plot_digits_denoising.py +++ b/examples/applications/plot_digits_denoising.py @@ -32,9 +32,10 @@ # :func:`~sklearn.datasets.fetch_openml` to get this dataset. In addition, we # normalize the dataset such that all pixel values are in the range (0, 1). import numpy as np + from sklearn.datasets import fetch_openml -from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MinMaxScaler X, y = fetch_openml(data_id=41082, as_frame=False, return_X_y=True, parser="pandas") X = MinMaxScaler().fit_transform(X) diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py index 878d889f52ce3..1ff4399d60739 100644 --- a/examples/applications/plot_face_recognition.py +++ b/examples/applications/plot_face_recognition.py @@ -13,18 +13,16 @@ """ # %% from time import time + import matplotlib.pyplot as plt +from scipy.stats import loguniform -from sklearn.model_selection import train_test_split -from sklearn.model_selection import RandomizedSearchCV from sklearn.datasets import fetch_lfw_people -from sklearn.metrics import classification_report -from sklearn.metrics import ConfusionMatrixDisplay -from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA +from sklearn.metrics import ConfusionMatrixDisplay, classification_report +from sklearn.model_selection import RandomizedSearchCV, train_test_split +from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC -from scipy.stats import loguniform - # %% # Download the data, if not already on disk and load it as numpy arrays diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py index 812539aa1ff46..f83be241230c3 100644 --- a/examples/applications/plot_model_complexity_influence.py +++ b/examples/applications/plot_model_complexity_influence.py @@ -42,16 +42,16 @@ # License: BSD 3 clause import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np from sklearn import datasets -from sklearn.model_selection import train_test_split -from sklearn.metrics import mean_squared_error -from sklearn.svm import NuSVR from sklearn.ensemble import GradientBoostingRegressor from sklearn.linear_model import SGDClassifier -from sklearn.metrics import hamming_loss +from sklearn.metrics import hamming_loss, mean_squared_error +from sklearn.model_selection import train_test_split +from sklearn.svm import NuSVR # Initialize random generator np.random.seed(0) diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py index 212cbda9cc71e..08ae3000c391c 100644 --- a/examples/applications/plot_out_of_core_classification.py +++ b/examples/applications/plot_out_of_core_classification.py @@ -19,24 +19,22 @@ # License: BSD 3 clause import itertools -from pathlib import Path -from hashlib import sha256 import re +import sys import tarfile import time -import sys +from hashlib import sha256 +from html.parser import HTMLParser +from pathlib import Path +from urllib.request import urlretrieve -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib import rcParams -from html.parser import HTMLParser -from urllib.request import urlretrieve from sklearn.datasets import get_data_home from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.linear_model import SGDClassifier -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.linear_model import Perceptron +from sklearn.linear_model import PassiveAggressiveClassifier, Perceptron, SGDClassifier from sklearn.naive_bayes import MultinomialNB diff --git a/examples/applications/plot_outlier_detection_wine.py b/examples/applications/plot_outlier_detection_wine.py index 45e4c64d9fcc4..c4adfa222a5dd 100644 --- a/examples/applications/plot_outlier_detection_wine.py +++ b/examples/applications/plot_outlier_detection_wine.py @@ -37,12 +37,13 @@ # Author: Virgile Fritsch # License: BSD 3 clause +import matplotlib.font_manager +import matplotlib.pyplot as plt import numpy as np + from sklearn.covariance import EllipticEnvelope -from sklearn.svm import OneClassSVM -import matplotlib.pyplot as plt -import matplotlib.font_manager from sklearn.datasets import load_wine +from sklearn.svm import OneClassSVM # Define "classifiers" to be used classifiers = { diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py index 9b99bcbfdfaf1..8fce81fb9fb4e 100644 --- a/examples/applications/plot_prediction_latency.py +++ b/examples/applications/plot_prediction_latency.py @@ -16,19 +16,18 @@ # Authors: Eustache Diemert # License: BSD 3 clause +import gc +import time from collections import defaultdict -import time -import gc -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.preprocessing import StandardScaler -from sklearn.model_selection import train_test_split from sklearn.datasets import make_regression from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import Ridge -from sklearn.linear_model import SGDRegressor +from sklearn.linear_model import Ridge, SGDRegressor +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler from sklearn.svm import SVR from sklearn.utils import shuffle diff --git a/examples/applications/plot_species_distribution_modeling.py b/examples/applications/plot_species_distribution_modeling.py index e3d5778f3307d..bdf50918840c2 100644 --- a/examples/applications/plot_species_distribution_modeling.py +++ b/examples/applications/plot_species_distribution_modeling.py @@ -43,12 +43,12 @@ from time import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.utils import Bunch +from sklearn import metrics, svm from sklearn.datasets import fetch_species_distributions -from sklearn import svm, metrics +from sklearn.utils import Bunch # if basemap is available, we'll use it. # otherwise, we'll improvise later... diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index 91168f434338e..cdf5a36074923 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -24,6 +24,7 @@ # `alphavantage.co `_. import sys + import numpy as np import pandas as pd diff --git a/examples/applications/plot_tomography_l1_reconstruction.py b/examples/applications/plot_tomography_l1_reconstruction.py index 9ac351c12206c..d851613402571 100644 --- a/examples/applications/plot_tomography_l1_reconstruction.py +++ b/examples/applications/plot_tomography_l1_reconstruction.py @@ -39,12 +39,11 @@ class :class:`~sklearn.linear_model.Lasso`, that uses the coordinate descent # Author: Emmanuelle Gouillart # License: BSD 3 clause -import numpy as np -from scipy import sparse -from scipy import ndimage -from sklearn.linear_model import Lasso -from sklearn.linear_model import Ridge import matplotlib.pyplot as plt +import numpy as np +from scipy import ndimage, sparse + +from sklearn.linear_model import Lasso, Ridge def _weights(x, dx=1, orig=0): diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py index 38945241ab68b..0385fd7c89333 100644 --- a/examples/applications/plot_topics_extraction_with_nmf_lda.py +++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py @@ -27,11 +27,12 @@ # License: BSD 3 clause from time import time + import matplotlib.pyplot as plt -from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer -from sklearn.decomposition import NMF, MiniBatchNMF, LatentDirichletAllocation from sklearn.datasets import fetch_20newsgroups +from sklearn.decomposition import NMF, LatentDirichletAllocation, MiniBatchNMF +from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer n_samples = 2000 n_features = 1000 diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py index c8019fa72ae91..cd0e3b6101bb6 100644 --- a/examples/applications/svm_gui.py +++ b/examples/applications/svm_gui.py @@ -30,13 +30,13 @@ from matplotlib.backends.backend_tkagg import ( NavigationToolbar2TkAgg as NavigationToolbar2Tk, ) -from matplotlib.figure import Figure -from matplotlib.contour import ContourSet - import sys -import numpy as np import tkinter as Tk +import numpy as np +from matplotlib.contour import ContourSet +from matplotlib.figure import Figure + from sklearn import svm from sklearn.datasets import dump_svmlight_file diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index fcc337b0a4e00..0be1661d7ed5c 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -33,19 +33,17 @@ # Author: Olivier Grisel # License: BSD 3 clause -from bz2 import BZ2File import os +from bz2 import BZ2File from datetime import datetime from pprint import pprint from time import time +from urllib.request import urlopen import numpy as np - from scipy import sparse from sklearn.decomposition import randomized_svd -from urllib.request import urlopen - # %% # Download data, if not already on disk diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py index a54f7099c9a74..0fef820bb9f2a 100644 --- a/examples/bicluster/plot_bicluster_newsgroups.py +++ b/examples/bicluster/plot_bicluster_newsgroups.py @@ -23,14 +23,13 @@ """ -from collections import defaultdict import operator +from collections import defaultdict from time import time import numpy as np -from sklearn.cluster import SpectralCoclustering -from sklearn.cluster import MiniBatchKMeans +from sklearn.cluster import MiniBatchKMeans, SpectralCoclustering from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.cluster import v_measure_score diff --git a/examples/bicluster/plot_spectral_biclustering.py b/examples/bicluster/plot_spectral_biclustering.py index baf0f0ccbc58f..18ff7ae67b83a 100644 --- a/examples/bicluster/plot_spectral_biclustering.py +++ b/examples/bicluster/plot_spectral_biclustering.py @@ -32,9 +32,10 @@ # # As you can see, the data is distributed over 12 cluster cells and is # relatively well distinguishable. -from sklearn.datasets import make_checkerboard from matplotlib import pyplot as plt +from sklearn.datasets import make_checkerboard + n_clusters = (4, 3) data, rows, columns = make_checkerboard( shape=(300, 300), n_clusters=n_clusters, noise=10, shuffle=False, random_state=42 diff --git a/examples/bicluster/plot_spectral_coclustering.py b/examples/bicluster/plot_spectral_coclustering.py index 0df275e83e3bd..92b10d93956e7 100644 --- a/examples/bicluster/plot_spectral_coclustering.py +++ b/examples/bicluster/plot_spectral_coclustering.py @@ -21,8 +21,8 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn.datasets import make_biclusters from sklearn.cluster import SpectralCoclustering +from sklearn.datasets import make_biclusters from sklearn.metrics import consensus_score data, rows, columns = make_biclusters( diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index 75d1ea15b8fbd..f928ae631b78b 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -91,8 +91,8 @@ # %% # Plot data and the predicted probabilities # ----------------------------------------- -from matplotlib import cm import matplotlib.pyplot as plt +from matplotlib import cm plt.figure() y_unique = np.unique(y) diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py index dc4e85a5f1678..915d3b7c20cc9 100644 --- a/examples/calibration/plot_calibration_curve.py +++ b/examples/calibration/plot_calibration_curve.py @@ -140,11 +140,11 @@ import pandas as pd from sklearn.metrics import ( - precision_score, - recall_score, - f1_score, brier_score_loss, + f1_score, log_loss, + precision_score, + recall_score, roc_auc_score, ) diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py index 24962a786ea03..fc6349f3dea5f 100644 --- a/examples/calibration/plot_calibration_multiclass.py +++ b/examples/calibration/plot_calibration_multiclass.py @@ -31,6 +31,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3). # License: BSD Style. import numpy as np + from sklearn.datasets import make_blobs np.random.seed(0) diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py index 87c3f51db5eb2..ec5887b63914d 100644 --- a/examples/classification/plot_classification_probability.py +++ b/examples/classification/plot_classification_probability.py @@ -23,12 +23,12 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.metrics import accuracy_score -from sklearn.linear_model import LogisticRegression -from sklearn.svm import SVC +from sklearn import datasets from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF -from sklearn import datasets +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score +from sklearn.svm import SVC iris = datasets.load_iris() X = iris.data[:, 0:2] # we only take the first two features for visualization diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py index 71e8318aa0acb..75164cff8b492 100644 --- a/examples/classification/plot_classifier_comparison.py +++ b/examples/classification/plot_classifier_comparison.py @@ -24,23 +24,24 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap + +from sklearn.datasets import make_circles, make_classification, make_moons +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier +from sklearn.gaussian_process import GaussianProcessClassifier +from sklearn.gaussian_process.kernels import RBF +from sklearn.inspection import DecisionBoundaryDisplay from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.pipeline import make_pipeline -from sklearn.datasets import make_moons, make_circles, make_classification -from sklearn.neural_network import MLPClassifier +from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC -from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import RBF from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier -from sklearn.naive_bayes import GaussianNB -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis -from sklearn.inspection import DecisionBoundaryDisplay names = [ "Nearest Neighbors", diff --git a/examples/classification/plot_digits_classification.py b/examples/classification/plot_digits_classification.py index f760916d1f66e..d6208400d5416 100644 --- a/examples/classification/plot_digits_classification.py +++ b/examples/classification/plot_digits_classification.py @@ -15,7 +15,7 @@ import matplotlib.pyplot as plt # Import datasets, classifiers and performance metrics -from sklearn import datasets, svm, metrics +from sklearn import datasets, metrics, svm from sklearn.model_selection import train_test_split ############################################################################### diff --git a/examples/classification/plot_lda.py b/examples/classification/plot_lda.py index 322cc8bb4007c..b24479b91f5ea 100644 --- a/examples/classification/plot_lda.py +++ b/examples/classification/plot_lda.py @@ -8,13 +8,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.covariance import OAS from sklearn.datasets import make_blobs from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.covariance import OAS - n_train = 20 # samples for training n_test = 200 # samples for testing diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index 712354f7f7f44..71230d0a9bcd9 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -15,8 +15,8 @@ class has its own standard deviation with QDA. # Colormap # -------- -import matplotlib.pyplot as plt import matplotlib as mpl +import matplotlib.pyplot as plt from matplotlib import colors cmap = colors.LinearSegmentedColormap( @@ -172,8 +172,10 @@ def plot_qda_cov(qda, splot): fontsize=15, ) -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.discriminant_analysis import ( + LinearDiscriminantAnalysis, + QuadraticDiscriminantAnalysis, +) for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # Linear Discriminant Analysis diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py index d2bc345c00b3e..5816ae298f419 100644 --- a/examples/cluster/plot_affinity_propagation.py +++ b/examples/cluster/plot_affinity_propagation.py @@ -10,8 +10,8 @@ """ import numpy as np -from sklearn.cluster import AffinityPropagation from sklearn import metrics +from sklearn.cluster import AffinityPropagation from sklearn.datasets import make_blobs # %% diff --git a/examples/cluster/plot_agglomerative_clustering.py b/examples/cluster/plot_agglomerative_clustering.py index 5bb87a9386bf8..d5e7a8168a648 100644 --- a/examples/cluster/plot_agglomerative_clustering.py +++ b/examples/cluster/plot_agglomerative_clustering.py @@ -28,6 +28,7 @@ # License: BSD 3 clause import time + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/cluster/plot_agglomerative_clustering_metrics.py b/examples/cluster/plot_agglomerative_clustering_metrics.py index f1a77d442dbe8..8eb2ea3f7285f 100644 --- a/examples/cluster/plot_agglomerative_clustering_metrics.py +++ b/examples/cluster/plot_agglomerative_clustering_metrics.py @@ -37,8 +37,8 @@ # Author: Gael Varoquaux # License: BSD 3-Clause or CC-0 -import matplotlib.pyplot as plt import matplotlib.patheffects as PathEffects +import matplotlib.pyplot as plt import numpy as np from sklearn.cluster import AgglomerativeClustering diff --git a/examples/cluster/plot_agglomerative_dendrogram.py b/examples/cluster/plot_agglomerative_dendrogram.py index 2de5030d68f6d..20c22f4f0bb39 100644 --- a/examples/cluster/plot_agglomerative_dendrogram.py +++ b/examples/cluster/plot_agglomerative_dendrogram.py @@ -10,11 +10,11 @@ """ import numpy as np - from matplotlib import pyplot as plt from scipy.cluster.hierarchy import dendrogram -from sklearn.datasets import load_iris + from sklearn.cluster import AgglomerativeClustering +from sklearn.datasets import load_iris def plot_dendrogram(model, **kwargs): diff --git a/examples/cluster/plot_birch_vs_minibatchkmeans.py b/examples/cluster/plot_birch_vs_minibatchkmeans.py index 3d4185dc9368a..c9c213c948913 100644 --- a/examples/cluster/plot_birch_vs_minibatchkmeans.py +++ b/examples/cluster/plot_birch_vs_minibatchkmeans.py @@ -25,17 +25,17 @@ # Alexandre Gramfort # License: BSD 3 clause -from joblib import cpu_count from itertools import cycle from time import time -import numpy as np -import matplotlib.pyplot as plt + import matplotlib.colors as colors +import matplotlib.pyplot as plt +import numpy as np +from joblib import cpu_count from sklearn.cluster import Birch, MiniBatchKMeans from sklearn.datasets import make_blobs - # Generate centers for the blobs so that it forms a 10 X 10 grid. xx = np.linspace(-22, 22, 10) yy = np.linspace(-22, 22, 10) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index a6be3545e0b27..3aebdffddaf63 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -15,9 +15,8 @@ """ import matplotlib.pyplot as plt -from sklearn.datasets import make_blobs from sklearn.cluster import BisectingKMeans, KMeans - +from sklearn.datasets import make_blobs print(__doc__) diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py index 843c629374828..27b4a1c46c415 100644 --- a/examples/cluster/plot_cluster_comparison.py +++ b/examples/cluster/plot_cluster_comparison.py @@ -26,14 +26,14 @@ import time import warnings +from itertools import cycle, islice -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn import cluster, datasets, mixture from sklearn.neighbors import kneighbors_graph from sklearn.preprocessing import StandardScaler -from itertools import cycle, islice np.random.seed(0) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index 4078d139f8064..b20bc8e38dd78 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -22,15 +22,15 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt # Though the following import is not directly being used, it is required # for 3D projection to work with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +import numpy as np -from sklearn.cluster import KMeans from sklearn import datasets +from sklearn.cluster import KMeans np.random.seed(5) diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py index bec68d1221646..c965dc2bd7ace 100644 --- a/examples/cluster/plot_coin_segmentation.py +++ b/examples/cluster/plot_coin_segmentation.py @@ -27,15 +27,14 @@ import time +import matplotlib.pyplot as plt import numpy as np from scipy.ndimage import gaussian_filter -import matplotlib.pyplot as plt from skimage.data import coins from skimage.transform import rescale -from sklearn.feature_extraction import image from sklearn.cluster import spectral_clustering - +from sklearn.feature_extraction import image # load the coins as a numpy array orig_coins = coins() diff --git a/examples/cluster/plot_color_quantization.py b/examples/cluster/plot_color_quantization.py index ae37673808e56..cc8849b64ab6f 100644 --- a/examples/cluster/plot_color_quantization.py +++ b/examples/cluster/plot_color_quantization.py @@ -25,13 +25,15 @@ # # License: BSD 3 clause -import numpy as np +from time import time + import matplotlib.pyplot as plt +import numpy as np + from sklearn.cluster import KMeans -from sklearn.metrics import pairwise_distances_argmin from sklearn.datasets import load_sample_image +from sklearn.metrics import pairwise_distances_argmin from sklearn.utils import shuffle -from time import time n_colors = 64 diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py index c762e0bceae08..0b0bd64ecf62b 100644 --- a/examples/cluster/plot_dbscan.py +++ b/examples/cluster/plot_dbscan.py @@ -44,8 +44,9 @@ # the `labels_` attribute. Noisy samples are given the label math:`-1`. import numpy as np -from sklearn.cluster import DBSCAN + from sklearn import metrics +from sklearn.cluster import DBSCAN db = DBSCAN(eps=0.3, min_samples=10).fit(X) labels = db.labels_ diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py index 627a9a28d7665..faedefb8aeed8 100644 --- a/examples/cluster/plot_digits_agglomeration.py +++ b/examples/cluster/plot_digits_agglomeration.py @@ -12,10 +12,10 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import datasets, cluster +from sklearn import cluster, datasets from sklearn.feature_extraction.image import grid_to_graph digits = datasets.load_digits() diff --git a/examples/cluster/plot_digits_linkage.py b/examples/cluster/plot_digits_linkage.py index 730f85c543356..ae67bd5d8e0f4 100644 --- a/examples/cluster/plot_digits_linkage.py +++ b/examples/cluster/plot_digits_linkage.py @@ -35,7 +35,7 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn import manifold, datasets +from sklearn import datasets, manifold digits = datasets.load_digits() X, y = digits.data, digits.target diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index e2273326b9a12..577d65f314337 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -21,18 +21,17 @@ import shutil import tempfile -import numpy as np import matplotlib.pyplot as plt -from scipy import linalg, ndimage +import numpy as np from joblib import Memory +from scipy import linalg, ndimage -from sklearn.feature_extraction.image import grid_to_graph from sklearn import feature_selection from sklearn.cluster import FeatureAgglomeration +from sklearn.feature_extraction.image import grid_to_graph from sklearn.linear_model import BayesianRidge +from sklearn.model_selection import GridSearchCV, KFold from sklearn.pipeline import Pipeline -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import KFold # %% # Set parameters diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py index b97858ff156e8..a4dfcb6c42bbc 100644 --- a/examples/cluster/plot_hdbscan.py +++ b/examples/cluster/plot_hdbscan.py @@ -13,11 +13,11 @@ We first define a couple utility functions for convenience. """ # %% +import matplotlib.pyplot as plt import numpy as np -from sklearn.cluster import HDBSCAN, DBSCAN +from sklearn.cluster import DBSCAN, HDBSCAN from sklearn.datasets import make_blobs -import matplotlib.pyplot as plt def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax=None): diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py index e395571a1caad..b6464459160e3 100644 --- a/examples/cluster/plot_inductive_clustering.py +++ b/examples/cluster/plot_inductive_clustering.py @@ -24,6 +24,7 @@ # Christos Aridas import matplotlib.pyplot as plt + from sklearn.base import BaseEstimator, clone from sklearn.cluster import AgglomerativeClustering from sklearn.datasets import make_blobs @@ -32,7 +33,6 @@ from sklearn.utils.metaestimators import available_if from sklearn.utils.validation import check_is_fitted - N_SAMPLES = 5000 RANDOM_STATE = 42 diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index bc1f01cb1cdd7..46a7ec6fa58b5 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -21,6 +21,7 @@ # one has to define a linear `transformation`. import numpy as np + from sklearn.datasets import make_blobs n_samples = 1500 diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py index 94bba2a5c52d9..d61ec91d13d52 100644 --- a/examples/cluster/plot_kmeans_digits.py +++ b/examples/cluster/plot_kmeans_digits.py @@ -34,6 +34,7 @@ # to group images such that the handwritten digits on the image are the same. import numpy as np + from sklearn.datasets import load_digits data, labels = load_digits(return_X_y=True) @@ -53,6 +54,7 @@ # * train and time the pipeline fitting; # * measure the performance of the clustering obtained via different metrics. from time import time + from sklearn import metrics from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler diff --git a/examples/cluster/plot_kmeans_plusplus.py b/examples/cluster/plot_kmeans_plusplus.py index 1f3507c0062ac..69ea738635ddf 100644 --- a/examples/cluster/plot_kmeans_plusplus.py +++ b/examples/cluster/plot_kmeans_plusplus.py @@ -10,9 +10,10 @@ """ +import matplotlib.pyplot as plt + from sklearn.cluster import kmeans_plusplus from sklearn.datasets import make_blobs -import matplotlib.pyplot as plt # Generate sample data n_samples = 4000 diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py index c7d0dc31d4873..c5817a750c2bb 100644 --- a/examples/cluster/plot_kmeans_silhouette_analysis.py +++ b/examples/cluster/plot_kmeans_silhouette_analysis.py @@ -31,14 +31,14 @@ """ -from sklearn.datasets import make_blobs -from sklearn.cluster import KMeans -from sklearn.metrics import silhouette_samples, silhouette_score - -import matplotlib.pyplot as plt import matplotlib.cm as cm +import matplotlib.pyplot as plt import numpy as np +from sklearn.cluster import KMeans +from sklearn.datasets import make_blobs +from sklearn.metrics import silhouette_samples, silhouette_score + # Generating the sample data from make_blobs # This particular setting has one distinct cluster and 3 clusters placed close # together. diff --git a/examples/cluster/plot_kmeans_stability_low_dim_dense.py b/examples/cluster/plot_kmeans_stability_low_dim_dense.py index c88cf864506f7..9340239a3d00e 100644 --- a/examples/cluster/plot_kmeans_stability_low_dim_dense.py +++ b/examples/cluster/plot_kmeans_stability_low_dim_dense.py @@ -26,14 +26,12 @@ # Author: Olivier Grisel # License: BSD 3 clause -import numpy as np -import matplotlib.pyplot as plt import matplotlib.cm as cm +import matplotlib.pyplot as plt +import numpy as np -from sklearn.utils import shuffle -from sklearn.utils import check_random_state -from sklearn.cluster import MiniBatchKMeans -from sklearn.cluster import KMeans +from sklearn.cluster import KMeans, MiniBatchKMeans +from sklearn.utils import check_random_state, shuffle random_state = np.random.RandomState(0) diff --git a/examples/cluster/plot_linkage_comparison.py b/examples/cluster/plot_linkage_comparison.py index af4c3cd2894af..dc009d0110f7c 100644 --- a/examples/cluster/plot_linkage_comparison.py +++ b/examples/cluster/plot_linkage_comparison.py @@ -25,13 +25,13 @@ import time import warnings +from itertools import cycle, islice -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn import cluster, datasets from sklearn.preprocessing import StandardScaler -from itertools import cycle, islice np.random.seed(0) diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py index 46ded7bc43421..aacbc7f216405 100644 --- a/examples/cluster/plot_mean_shift.py +++ b/examples/cluster/plot_mean_shift.py @@ -12,6 +12,7 @@ """ import numpy as np + from sklearn.cluster import MeanShift, estimate_bandwidth from sklearn.datasets import make_blobs diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py index 7a9d599704059..3a6e8aa63786b 100644 --- a/examples/cluster/plot_mini_batch_kmeans.py +++ b/examples/cluster/plot_mini_batch_kmeans.py @@ -21,6 +21,7 @@ # We start by generating the blobs of data to be clustered. import numpy as np + from sklearn.datasets import make_blobs np.random.seed(0) @@ -35,6 +36,7 @@ # ------------------------------ import time + from sklearn.cluster import KMeans k_means = KMeans(init="k-means++", n_clusters=3, n_init=10) diff --git a/examples/cluster/plot_optics.py b/examples/cluster/plot_optics.py index 7915abd20ce53..5765f8089f3b2 100644 --- a/examples/cluster/plot_optics.py +++ b/examples/cluster/plot_optics.py @@ -20,11 +20,12 @@ # Adrin Jalali # License: BSD 3 clause -from sklearn.cluster import OPTICS, cluster_optics_dbscan import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import numpy as np +from sklearn.cluster import OPTICS, cluster_optics_dbscan + # Generate sample data np.random.seed(0) diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py index 0880cdb893839..6fc41f7a5daf2 100644 --- a/examples/cluster/plot_segmentation_toy.py +++ b/examples/cluster/plot_segmentation_toy.py @@ -78,9 +78,10 @@ # %% # Here we perform spectral clustering using the arpack solver since amg is # numerically unstable on this example. We then plot the results. -from sklearn.cluster import spectral_clustering import matplotlib.pyplot as plt +from sklearn.cluster import spectral_clustering + labels = spectral_clustering(graph, n_clusters=4, eigen_solver="arpack") label_im = np.full(mask.shape, -1.0) label_im[mask] = labels diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py index 430d00a8b3730..446d744b31e78 100644 --- a/examples/cluster/plot_ward_structured_vs_unstructured.py +++ b/examples/cluster/plot_ward_structured_vs_unstructured.py @@ -29,18 +29,14 @@ # The following import is required # for 3D projection to work with matplotlib < 3.2 - import mpl_toolkits.mplot3d # noqa: F401 - import numpy as np - # %% # Generate data # ------------- # # We start by generating the Swiss Roll dataset. - from sklearn.datasets import make_swiss_roll n_samples = 1500 diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py index d4798d828b321..669e817cbf81d 100644 --- a/examples/compose/plot_column_transformer.py +++ b/examples/compose/plot_column_transformer.py @@ -24,14 +24,14 @@ import numpy as np -from sklearn.preprocessing import FunctionTransformer +from sklearn.compose import ColumnTransformer from sklearn.datasets import fetch_20newsgroups from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction import DictVectorizer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics import classification_report from sklearn.pipeline import Pipeline -from sklearn.compose import ColumnTransformer +from sklearn.preprocessing import FunctionTransformer from sklearn.svm import LinearSVC ############################################################################## diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 5ed3239db8478..d7efd033440ce 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -34,12 +34,12 @@ from sklearn.compose import ColumnTransformer from sklearn.datasets import fetch_openml -from sklearn.pipeline import Pipeline +from sklearn.feature_selection import SelectPercentile, chi2 from sklearn.impute import SimpleImputer -from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split, RandomizedSearchCV -from sklearn.feature_selection import SelectPercentile, chi2 +from sklearn.model_selection import RandomizedSearchCV, train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler np.random.seed(0) diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py index 47975f84325b8..529366c6244f2 100644 --- a/examples/compose/plot_compare_reduction.py +++ b/examples/compose/plot_compare_reduction.py @@ -28,15 +28,16 @@ # Illustration of ``Pipeline`` and ``GridSearchCV`` ############################################################################### -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import load_digits +from sklearn.decomposition import NMF, PCA +from sklearn.feature_selection import SelectKBest, mutual_info_classif from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline -from sklearn.svm import LinearSVC -from sklearn.decomposition import PCA, NMF -from sklearn.feature_selection import SelectKBest, mutual_info_classif from sklearn.preprocessing import MinMaxScaler +from sklearn.svm import LinearSVC X, y = load_digits(return_X_y=True) @@ -103,9 +104,10 @@ # cache. Hence, use the ``memory`` constructor parameter when the fitting # of a transformer is costly. -from joblib import Memory from shutil import rmtree +from joblib import Memory + # Create a temporary folder to store the transformers of the pipeline location = "cachedir" memory = Memory(location=location, verbose=10) diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index 640cd6e529a8d..2769422c404a4 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -14,15 +14,15 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd from sklearn import datasets from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression -from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler # Define a pipeline to search for the best combination of PCA truncation diff --git a/examples/compose/plot_feature_union.py b/examples/compose/plot_feature_union.py index e014b8b8808b9..01f7e02bfe44f 100644 --- a/examples/compose/plot_feature_union.py +++ b/examples/compose/plot_feature_union.py @@ -20,12 +20,12 @@ # # License: BSD 3 clause -from sklearn.pipeline import Pipeline, FeatureUnion -from sklearn.model_selection import GridSearchCV -from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.decomposition import PCA from sklearn.feature_selection import SelectKBest +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import FeatureUnion, Pipeline +from sklearn.svm import SVC iris = load_iris() diff --git a/examples/compose/plot_transformed_target.py b/examples/compose/plot_transformed_target.py index 1e550ca0ea837..b01c9fbe37934 100644 --- a/examples/compose/plot_transformed_target.py +++ b/examples/compose/plot_transformed_target.py @@ -32,6 +32,7 @@ # (`np.expm1`) will be used to transform the targets before training a linear # regression model and using it for prediction. import numpy as np + from sklearn.datasets import make_regression X, y = make_regression(n_samples=10_000, noise=100, random_state=0) @@ -42,6 +43,7 @@ # Below we plot the probability density functions of the target # before and after applying the logarithmic functions. import matplotlib.pyplot as plt + from sklearn.model_selection import train_test_split f, (ax0, ax1) = plt.subplots(1, 2) diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index be3bf4837eb9f..df9af8ea330ba 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -37,9 +37,10 @@ # Compute the likelihood on test data # ----------------------------------- -from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood from scipy import linalg +from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood + # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-2, 0, 30) negative_logliks = [ @@ -73,8 +74,8 @@ # are Gaussian, in particular for small samples. +from sklearn.covariance import OAS, LedoitWolf from sklearn.model_selection import GridSearchCV -from sklearn.covariance import LedoitWolf, OAS # GridSearch for an optimal shrinkage coefficient tuned_parameters = [{"shrinkage": shrinkages}] diff --git a/examples/covariance/plot_lw_vs_oas.py b/examples/covariance/plot_lw_vs_oas.py index 1fd84b180f50a..107f6bd1c29cc 100644 --- a/examples/covariance/plot_lw_vs_oas.py +++ b/examples/covariance/plot_lw_vs_oas.py @@ -21,11 +21,11 @@ """ -import numpy as np import matplotlib.pyplot as plt -from scipy.linalg import toeplitz, cholesky +import numpy as np +from scipy.linalg import cholesky, toeplitz -from sklearn.covariance import LedoitWolf, OAS +from sklearn.covariance import OAS, LedoitWolf np.random.seed(0) # %% diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py index b93d68a269706..bd61e5af22147 100644 --- a/examples/covariance/plot_mahalanobis_distances.py +++ b/examples/covariance/plot_mahalanobis_distances.py @@ -103,6 +103,7 @@ # designed to have a much larger variance in feature 2. import matplotlib.pyplot as plt + from sklearn.covariance import EmpiricalCovariance, MinCovDet # fit a MCD robust estimator to data diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py index 9111ec82bcbf3..c61a97ddd979b 100644 --- a/examples/covariance/plot_robust_vs_empirical_covariance.py +++ b/examples/covariance/plot_robust_vs_empirical_covariance.py @@ -53,9 +53,9 @@ """ -import numpy as np -import matplotlib.pyplot as plt import matplotlib.font_manager +import matplotlib.pyplot as plt +import numpy as np from sklearn.covariance import EmpiricalCovariance, MinCovDet diff --git a/examples/covariance/plot_sparse_cov.py b/examples/covariance/plot_sparse_cov.py index 96a5486dc964e..a088aeb7e69c0 100644 --- a/examples/covariance/plot_sparse_cov.py +++ b/examples/covariance/plot_sparse_cov.py @@ -59,6 +59,7 @@ # ----------------- import numpy as np from scipy import linalg + from sklearn.datasets import make_sparse_spd_matrix n_samples = 60 diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py index 529225d11eead..895c75dc1a728 100644 --- a/examples/cross_decomposition/plot_pcr_vs_pls.py +++ b/examples/cross_decomposition/plot_pcr_vs_pls.py @@ -41,8 +41,9 @@ # into PCR and PLS, we fit a PCA estimator to display the two principal # components of this dataset, i.e. the two directions that explain the most # variance in the data. -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.decomposition import PCA rng = np.random.RandomState(0) @@ -99,12 +100,12 @@ # For both models, we plot the projected data onto the first component against # the target. In both cases, this projected data is what the regressors will # use as training data. +from sklearn.cross_decomposition import PLSRegression +from sklearn.decomposition import PCA +from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline -from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler -from sklearn.decomposition import PCA -from sklearn.cross_decomposition import PLSRegression X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py index 95ce867011a9a..0fde32cc674a8 100644 --- a/examples/datasets/plot_digits_last_image.py +++ b/examples/datasets/plot_digits_last_image.py @@ -18,10 +18,10 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -from sklearn import datasets - import matplotlib.pyplot as plt +from sklearn import datasets + # Load the digits dataset digits = datasets.load_digits() diff --git a/examples/datasets/plot_random_dataset.py b/examples/datasets/plot_random_dataset.py index 4f3fdbbb11ef5..e5cbdb080b59f 100644 --- a/examples/datasets/plot_random_dataset.py +++ b/examples/datasets/plot_random_dataset.py @@ -16,9 +16,7 @@ import matplotlib.pyplot as plt -from sklearn.datasets import make_classification -from sklearn.datasets import make_blobs -from sklearn.datasets import make_gaussian_quantiles +from sklearn.datasets import make_blobs, make_classification, make_gaussian_quantiles plt.figure(figsize=(8, 8)) plt.subplots_adjust(bottom=0.05, top=0.9, left=0.05, right=0.95) diff --git a/examples/datasets/plot_random_multilabel_dataset.py b/examples/datasets/plot_random_multilabel_dataset.py index f22c7b9695c42..e6e2d6ad9edcf 100644 --- a/examples/datasets/plot_random_multilabel_dataset.py +++ b/examples/datasets/plot_random_multilabel_dataset.py @@ -35,8 +35,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import make_multilabel_classification as make_ml_clf diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index 12c091c8e14cb..c29c99b7f3c67 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -21,12 +21,11 @@ import logging -from numpy.random import RandomState import matplotlib.pyplot as plt +from numpy.random import RandomState +from sklearn import cluster, decomposition from sklearn.datasets import fetch_olivetti_faces -from sklearn import cluster -from sklearn import decomposition rng = RandomState(0) diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py index 8c1529a3256fb..584d6b9509589 100644 --- a/examples/decomposition/plot_ica_blind_source_separation.py +++ b/examples/decomposition/plot_ica_blind_source_separation.py @@ -41,7 +41,7 @@ # Fit ICA and PCA models # ---------------------- -from sklearn.decomposition import FastICA, PCA +from sklearn.decomposition import PCA, FastICA # Compute ICA ica = FastICA(n_components=3, whiten="arbitrary-variance") diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py index 2840905f0f604..646669d1469ff 100644 --- a/examples/decomposition/plot_image_denoising.py +++ b/examples/decomposition/plot_image_denoising.py @@ -37,7 +37,6 @@ # ------------------------ import numpy as np - try: # Scipy >= 1.10 from scipy.datasets import face except ImportError: diff --git a/examples/decomposition/plot_incremental_pca.py b/examples/decomposition/plot_incremental_pca.py index adc7f83f3cda0..8e5aeccfddc8a 100644 --- a/examples/decomposition/plot_incremental_pca.py +++ b/examples/decomposition/plot_incremental_pca.py @@ -22,8 +22,8 @@ # Authors: Kyle Kastner # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import load_iris from sklearn.decomposition import PCA, IncrementalPCA diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py index 692b9983ed55e..61ce5dde75c89 100644 --- a/examples/decomposition/plot_pca_3d.py +++ b/examples/decomposition/plot_pca_3d.py @@ -19,7 +19,6 @@ # --------------- import numpy as np - from scipy import stats e = np.exp(1) @@ -52,13 +51,13 @@ def pdf(x): # Plot the figures # ---------------- -from sklearn.decomposition import PCA - import matplotlib.pyplot as plt # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +from sklearn.decomposition import PCA + def plot_figs(fig_num, elev, azim): fig = plt.figure(fig_num, figsize=(4, 3)) diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py index 7c3e69580d298..d025ba34adc27 100644 --- a/examples/decomposition/plot_pca_iris.py +++ b/examples/decomposition/plot_pca_iris.py @@ -13,15 +13,13 @@ # Code source: Gaël Varoquaux # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt - -from sklearn import decomposition -from sklearn import datasets - # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +import numpy as np + +from sklearn import datasets, decomposition np.random.seed(5) diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py index 4c934ab756c3e..e269fc6b5c278 100644 --- a/examples/decomposition/plot_pca_vs_fa_model_selection.py +++ b/examples/decomposition/plot_pca_vs_fa_model_selection.py @@ -34,7 +34,6 @@ # --------------- import numpy as np - from scipy import linalg n_samples, n_features, rank = 500, 25, 5 @@ -56,10 +55,9 @@ import matplotlib.pyplot as plt +from sklearn.covariance import LedoitWolf, ShrunkCovariance from sklearn.decomposition import PCA, FactorAnalysis -from sklearn.covariance import ShrunkCovariance, LedoitWolf -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GridSearchCV, cross_val_score n_components = np.arange(0, n_features, 5) # options for n_components diff --git a/examples/decomposition/plot_sparse_coding.py b/examples/decomposition/plot_sparse_coding.py index 4f4602f1ff1ac..c45cd3c83b04f 100644 --- a/examples/decomposition/plot_sparse_coding.py +++ b/examples/decomposition/plot_sparse_coding.py @@ -16,8 +16,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.decomposition import SparseCoder diff --git a/examples/decomposition/plot_varimax_fa.py b/examples/decomposition/plot_varimax_fa.py index 6e50709620325..9d4c3b9ed1ee7 100644 --- a/examples/decomposition/plot_varimax_fa.py +++ b/examples/decomposition/plot_varimax_fa.py @@ -22,9 +22,9 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.decomposition import FactorAnalysis, PCA -from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_iris +from sklearn.decomposition import PCA, FactorAnalysis +from sklearn.preprocessing import StandardScaler # %% # Load Iris data diff --git a/examples/ensemble/plot_adaboost_hastie_10_2.py b/examples/ensemble/plot_adaboost_hastie_10_2.py index 13d3a90d3b05c..313056286f6ba 100644 --- a/examples/ensemble/plot_adaboost_hastie_10_2.py +++ b/examples/ensemble/plot_adaboost_hastie_10_2.py @@ -94,6 +94,7 @@ # added to the ensemble. import numpy as np + from sklearn.metrics import zero_one_loss ada_discrete_err = np.zeros((n_estimators,)) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index fae87b4a42d3d..f12aa8c75e213 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -35,7 +35,6 @@ from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier - X, y = make_gaussian_quantiles( n_samples=13000, n_features=10, n_classes=3, random_state=1 ) diff --git a/examples/ensemble/plot_adaboost_twoclass.py b/examples/ensemble/plot_adaboost_twoclass.py index 19679c6285d3b..d1e89c47b7fcf 100644 --- a/examples/ensemble/plot_adaboost_twoclass.py +++ b/examples/ensemble/plot_adaboost_twoclass.py @@ -21,14 +21,13 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.ensemble import AdaBoostClassifier -from sklearn.tree import DecisionTreeClassifier from sklearn.datasets import make_gaussian_quantiles +from sklearn.ensemble import AdaBoostClassifier from sklearn.inspection import DecisionBoundaryDisplay - +from sklearn.tree import DecisionTreeClassifier # Construct dataset X1, y1 = make_gaussian_quantiles( diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py index 4f57b90019e94..9239603115db1 100644 --- a/examples/ensemble/plot_bias_variance.py +++ b/examples/ensemble/plot_bias_variance.py @@ -66,8 +66,8 @@ # Author: Gilles Louppe # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.ensemble import BaggingRegressor from sklearn.tree import DecisionTreeRegressor diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py index bd678af42a7d1..972ca1f6259aa 100644 --- a/examples/ensemble/plot_ensemble_oob.py +++ b/examples/ensemble/plot_ensemble_oob.py @@ -26,9 +26,10 @@ # # License: BSD 3 Clause +from collections import OrderedDict + import matplotlib.pyplot as plt -from collections import OrderedDict from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py index 36eb87bb757cd..8a17dd9d74194 100644 --- a/examples/ensemble/plot_feature_transformation.py +++ b/examples/ensemble/plot_feature_transformation.py @@ -59,7 +59,7 @@ # First, we will start by training the random forest and gradient boosting on # the separated training set -from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier +from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier random_forest = RandomForestClassifier( n_estimators=n_estimators, max_depth=max_depth, random_state=10 @@ -105,8 +105,7 @@ # method `apply`. The pipeline in scikit-learn expects a call to `transform`. # Therefore, we wrapped the call to `apply` within a `FunctionTransformer`. -from sklearn.preprocessing import FunctionTransformer -from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import FunctionTransformer, OneHotEncoder def rf_apply(X, model): @@ -143,6 +142,7 @@ def gbdt_apply(X, model): # We can finally show the different ROC curves for all the models. import matplotlib.pyplot as plt + from sklearn.metrics import RocCurveDisplay fig, ax = plt.subplots() diff --git a/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py b/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py index b4a1993471474..cc48e47e9ad34 100644 --- a/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py +++ b/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py @@ -78,8 +78,8 @@ # here to keep the example simple. import pandas as pd -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import RandomForestRegressor + +from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor from sklearn.model_selection import GridSearchCV, KFold models = { @@ -123,8 +123,8 @@ # Error bars correspond to one standard deviation as computed in the different # folds of the cross-validation. -import plotly.express as px import plotly.colors as colors +import plotly.express as px from plotly.subplots import make_subplots fig = make_subplots( diff --git a/examples/ensemble/plot_forest_importances.py b/examples/ensemble/plot_forest_importances.py index fbda63b26faee..269451168dd7a 100644 --- a/examples/ensemble/plot_forest_importances.py +++ b/examples/ensemble/plot_forest_importances.py @@ -57,6 +57,7 @@ # cardinality** features (many unique values). See # :ref:`permutation_importance` as an alternative below. import time + import numpy as np start_time = time.time() diff --git a/examples/ensemble/plot_forest_importances_faces.py b/examples/ensemble/plot_forest_importances_faces.py index 3848873c297de..8b8e8751ec5a2 100644 --- a/examples/ensemble/plot_forest_importances_faces.py +++ b/examples/ensemble/plot_forest_importances_faces.py @@ -59,6 +59,7 @@ # cardinality** features (many unique values). See # :ref:`permutation_importance` as an alternative. import time + import matplotlib.pyplot as plt start_time = time.time() diff --git a/examples/ensemble/plot_forest_iris.py b/examples/ensemble/plot_forest_iris.py index ee414db7125dc..6aaceea88efd2 100644 --- a/examples/ensemble/plot_forest_iris.py +++ b/examples/ensemble/plot_forest_iris.py @@ -42,15 +42,15 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap from sklearn.datasets import load_iris from sklearn.ensemble import ( - RandomForestClassifier, - ExtraTreesClassifier, AdaBoostClassifier, + ExtraTreesClassifier, + RandomForestClassifier, ) from sklearn.tree import DecisionTreeClassifier diff --git a/examples/ensemble/plot_gradient_boosting_categorical.py b/examples/ensemble/plot_gradient_boosting_categorical.py index fa4b68be9cbb7..0dd0a84243b4d 100644 --- a/examples/ensemble/plot_gradient_boosting_categorical.py +++ b/examples/ensemble/plot_gradient_boosting_categorical.py @@ -77,10 +77,9 @@ # As a baseline, we create an estimator where the categorical features are # dropped: +from sklearn.compose import make_column_selector, make_column_transformer from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.pipeline import make_pipeline -from sklearn.compose import make_column_transformer -from sklearn.compose import make_column_selector dropper = make_column_transformer( ("drop", make_column_selector(dtype_include="category")), remainder="passthrough" @@ -114,9 +113,10 @@ # were ordered quantities, i.e. the categories will be encoded as 0, 1, 2, # etc., and treated as continuous features. -from sklearn.preprocessing import OrdinalEncoder import numpy as np +from sklearn.preprocessing import OrdinalEncoder + ordinal_encoder = make_column_transformer( ( OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan), @@ -166,9 +166,10 @@ # models performance in terms of # :func:`~metrics.mean_absolute_percentage_error` and fit times. -from sklearn.model_selection import cross_validate import matplotlib.pyplot as plt +from sklearn.model_selection import cross_validate + scoring = "neg_mean_absolute_percentage_error" n_cv_folds = 3 diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py index 6f1013eed9564..f271f80a07c55 100644 --- a/examples/ensemble/plot_gradient_boosting_early_stopping.py +++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py @@ -38,11 +38,10 @@ import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import ensemble -from sklearn import datasets +from sklearn import datasets, ensemble from sklearn.model_selection import train_test_split data_list = [ diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py index dd7f19a1fe245..0cb40ad2c11ea 100644 --- a/examples/ensemble/plot_gradient_boosting_oob.py +++ b/examples/ensemble/plot_gradient_boosting_oob.py @@ -26,15 +26,13 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from scipy.special import expit from sklearn import ensemble -from sklearn.model_selection import KFold -from sklearn.model_selection import train_test_split from sklearn.metrics import log_loss - -from scipy.special import expit +from sklearn.model_selection import KFold, train_test_split # Generate data (adapted from G. Ridgeway's gbm example) n_samples = 1000 diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index 2aa04c3988d9e..36a78dfeb94ca 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -12,6 +12,7 @@ # Generate some data for a synthetic regression problem by applying the # function f to uniformly sampled random inputs. import numpy as np + from sklearn.model_selection import train_test_split @@ -58,7 +59,6 @@ def f(x): from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import mean_pinball_loss, mean_squared_error - all_models = {} common_params = dict( learning_rate=0.05, @@ -93,7 +93,6 @@ def f(x): # 90% interval (from 5th to 95th conditional percentiles). import matplotlib.pyplot as plt - y_pred = all_models["mse"].predict(xx) y_lower = all_models["q 0.05"].predict(xx) y_upper = all_models["q 0.95"].predict(xx) diff --git a/examples/ensemble/plot_gradient_boosting_regression.py b/examples/ensemble/plot_gradient_boosting_regression.py index 3e378e8af7203..94705ccfeca24 100644 --- a/examples/ensemble/plot_gradient_boosting_regression.py +++ b/examples/ensemble/plot_gradient_boosting_regression.py @@ -23,6 +23,7 @@ import matplotlib.pyplot as plt import numpy as np + from sklearn import datasets, ensemble from sklearn.inspection import permutation_importance from sklearn.metrics import mean_squared_error diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py index a4ac69a822b92..218d69d5ac7d7 100644 --- a/examples/ensemble/plot_gradient_boosting_regularization.py +++ b/examples/ensemble/plot_gradient_boosting_regularization.py @@ -25,11 +25,10 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import ensemble -from sklearn import datasets +from sklearn import datasets, ensemble from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split diff --git a/examples/ensemble/plot_isolation_forest.py b/examples/ensemble/plot_isolation_forest.py index aeabb60203ac6..f5fad1d7b9ea9 100644 --- a/examples/ensemble/plot_isolation_forest.py +++ b/examples/ensemble/plot_isolation_forest.py @@ -31,6 +31,7 @@ # the label `-1`. import numpy as np + from sklearn.model_selection import train_test_split n_samples, n_outliers = 120, 40 @@ -78,6 +79,7 @@ # or not. The scatter plot displays the true labels. import matplotlib.pyplot as plt + from sklearn.inspection import DecisionBoundaryDisplay disp = DecisionBoundaryDisplay.from_estimator( diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py index b1f7ca8ed24ed..15ad8e9524243 100644 --- a/examples/ensemble/plot_monotonic_constraints.py +++ b/examples/ensemble/plot_monotonic_constraints.py @@ -20,11 +20,11 @@ """ # %% -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.inspection import PartialDependenceDisplay -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.inspection import PartialDependenceDisplay rng = np.random.RandomState(0) diff --git a/examples/ensemble/plot_random_forest_embedding.py b/examples/ensemble/plot_random_forest_embedding.py index 000b83e67b92a..fe26e04ca7789 100644 --- a/examples/ensemble/plot_random_forest_embedding.py +++ b/examples/ensemble/plot_random_forest_embedding.py @@ -26,12 +26,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import make_circles -from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier from sklearn.decomposition import TruncatedSVD +from sklearn.ensemble import ExtraTreesClassifier, RandomTreesEmbedding from sklearn.naive_bayes import BernoulliNB # make a synthetic dataset diff --git a/examples/ensemble/plot_random_forest_regression_multioutput.py b/examples/ensemble/plot_random_forest_regression_multioutput.py index 4b3d4f4a9a728..ce8346c329127 100644 --- a/examples/ensemble/plot_random_forest_regression_multioutput.py +++ b/examples/ensemble/plot_random_forest_regression_multioutput.py @@ -25,13 +25,13 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.multioutput import MultiOutputRegressor - # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) diff --git a/examples/ensemble/plot_stack_predictors.py b/examples/ensemble/plot_stack_predictors.py index 56a82ded5b725..aac7ccc8a3ef8 100644 --- a/examples/ensemble/plot_stack_predictors.py +++ b/examples/ensemble/plot_stack_predictors.py @@ -131,8 +131,7 @@ def load_ames_housing(): # Then, we will now define the preprocessor used when the ending regressor # is a linear model. -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import OneHotEncoder, StandardScaler cat_linear_processor = OneHotEncoder(handle_unknown="ignore") num_linear_processor = make_pipeline( @@ -206,9 +205,11 @@ def load_ames_housing(): import time + import matplotlib.pyplot as plt + from sklearn.metrics import PredictionErrorDisplay -from sklearn.model_selection import cross_validate, cross_val_predict +from sklearn.model_selection import cross_val_predict, cross_validate fig, axs = plt.subplots(2, 2, figsize=(9, 7)) axs = np.ravel(axs) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index e6dc68eeadf98..90441c6d28339 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -28,11 +28,11 @@ import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.tree import DecisionTreeClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC from sklearn.ensemble import VotingClassifier from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC +from sklearn.tree import DecisionTreeClassifier # Loading some example data iris = datasets.load_iris() diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py index 54c290c3073e0..14f4f4330c045 100644 --- a/examples/ensemble/plot_voting_probas.py +++ b/examples/ensemble/plot_voting_probas.py @@ -23,13 +23,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.ensemble import RandomForestClassifier, VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import VotingClassifier clf1 = LogisticRegression(max_iter=1000, random_state=123) clf2 = RandomForestClassifier(n_estimators=100, random_state=123) diff --git a/examples/ensemble/plot_voting_regressor.py b/examples/ensemble/plot_voting_regressor.py index 23e709cc9e62a..d33becca505e3 100644 --- a/examples/ensemble/plot_voting_regressor.py +++ b/examples/ensemble/plot_voting_regressor.py @@ -26,10 +26,12 @@ import matplotlib.pyplot as plt from sklearn.datasets import load_diabetes -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import ( + GradientBoostingRegressor, + RandomForestRegressor, + VotingRegressor, +) from sklearn.linear_model import LinearRegression -from sklearn.ensemble import VotingRegressor # %% # Training classifiers diff --git a/examples/exercises/plot_cv_digits.py b/examples/exercises/plot_cv_digits.py index e43bbd86bb027..ebad3a55098b5 100644 --- a/examples/exercises/plot_cv_digits.py +++ b/examples/exercises/plot_cv_digits.py @@ -11,8 +11,9 @@ """ import numpy as np -from sklearn.model_selection import cross_val_score + from sklearn import datasets, svm +from sklearn.model_selection import cross_val_score X, y = datasets.load_digits(return_X_y=True) diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py index 877e615659743..25b0171c66421 100644 --- a/examples/exercises/plot_digits_classification_exercise.py +++ b/examples/exercises/plot_digits_classification_exercise.py @@ -12,7 +12,7 @@ """ -from sklearn import datasets, neighbors, linear_model +from sklearn import datasets, linear_model, neighbors X_digits, y_digits = datasets.load_digits(return_X_y=True) X_digits = X_digits / X_digits.max() diff --git a/examples/exercises/plot_iris_exercise.py b/examples/exercises/plot_iris_exercise.py index 74da8c27889c9..07687b920e1b8 100644 --- a/examples/exercises/plot_iris_exercise.py +++ b/examples/exercises/plot_iris_exercise.py @@ -10,8 +10,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets, svm iris = datasets.load_iris() diff --git a/examples/feature_selection/plot_f_test_vs_mi.py b/examples/feature_selection/plot_f_test_vs_mi.py index ba82625a7cfaf..5c015e7e4fd58 100644 --- a/examples/feature_selection/plot_f_test_vs_mi.py +++ b/examples/feature_selection/plot_f_test_vs_mi.py @@ -23,8 +23,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.feature_selection import f_regression, mutual_info_regression np.random.seed(0) diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py index ce2bad8626a79..c57a2d5d6b6f9 100644 --- a/examples/feature_selection/plot_feature_selection.py +++ b/examples/feature_selection/plot_feature_selection.py @@ -21,6 +21,7 @@ # -------------------- # import numpy as np + from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py index 9684f5fabd383..553f38f9c674f 100644 --- a/examples/feature_selection/plot_rfe_digits.py +++ b/examples/feature_selection/plot_rfe_digits.py @@ -12,10 +12,11 @@ """ # noqa: E501 -from sklearn.svm import SVC +import matplotlib.pyplot as plt + from sklearn.datasets import load_digits from sklearn.feature_selection import RFE -import matplotlib.pyplot as plt +from sklearn.svm import SVC # Load the digits dataset digits = load_digits() diff --git a/examples/feature_selection/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py index 2d52ea5a3fdf3..693e21fe21787 100644 --- a/examples/feature_selection/plot_rfe_with_cross_validation.py +++ b/examples/feature_selection/plot_rfe_with_cross_validation.py @@ -39,8 +39,8 @@ # strategy "accuracy" optimizes the proportion of correctly classified samples. from sklearn.feature_selection import RFECV -from sklearn.model_selection import StratifiedKFold from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import StratifiedKFold min_features_to_select = 1 # Minimum number of features to consider clf = LogisticRegression() diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py index 6e8a6434e1079..38276efcbd770 100644 --- a/examples/feature_selection/plot_select_from_model_diabetes.py +++ b/examples/feature_selection/plot_select_from_model_diabetes.py @@ -46,6 +46,7 @@ # :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`. # noqa: E501 import matplotlib.pyplot as plt import numpy as np + from sklearn.linear_model import RidgeCV ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y) @@ -67,9 +68,10 @@ # # Since we want to select only 2 features, we will set this threshold slightly # above the coefficient of third most important feature. -from sklearn.feature_selection import SelectFromModel from time import time +from sklearn.feature_selection import SelectFromModel + threshold = np.sort(importance)[-3] + 0.01 tic = time() diff --git a/examples/gaussian_process/plot_compare_gpr_krr.py b/examples/gaussian_process/plot_compare_gpr_krr.py index 7a58ba437278f..8379baf148256 100644 --- a/examples/gaussian_process/plot_compare_gpr_krr.py +++ b/examples/gaussian_process/plot_compare_gpr_krr.py @@ -125,6 +125,7 @@ # # Thus, let's use such a :class:`~sklearn.kernel_ridge.KernelRidge`. import time + from sklearn.gaussian_process.kernels import ExpSineSquared from sklearn.kernel_ridge import KernelRidge @@ -176,9 +177,10 @@ # parameter and the kernel parameters. # %% -from sklearn.model_selection import RandomizedSearchCV from scipy.stats import loguniform +from sklearn.model_selection import RandomizedSearchCV + param_distributions = { "alpha": loguniform(1e0, 1e3), "kernel__length_scale": loguniform(1e-2, 1e2), diff --git a/examples/gaussian_process/plot_gpc.py b/examples/gaussian_process/plot_gpc.py index e2d78fa23f09e..21a99065e06ce 100644 --- a/examples/gaussian_process/plot_gpc.py +++ b/examples/gaussian_process/plot_gpc.py @@ -27,13 +27,11 @@ # License: BSD 3 clause import numpy as np - from matplotlib import pyplot as plt -from sklearn.metrics import accuracy_score, log_loss from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF - +from sklearn.metrics import accuracy_score, log_loss # Generate data train_size = 50 diff --git a/examples/gaussian_process/plot_gpc_iris.py b/examples/gaussian_process/plot_gpc_iris.py index ce0ed066a1377..88c536d8824c8 100644 --- a/examples/gaussian_process/plot_gpc_iris.py +++ b/examples/gaussian_process/plot_gpc_iris.py @@ -10,8 +10,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF diff --git a/examples/gaussian_process/plot_gpc_isoprobability.py b/examples/gaussian_process/plot_gpc_isoprobability.py index cc036244bc17a..a986d285632b7 100644 --- a/examples/gaussian_process/plot_gpc_isoprobability.py +++ b/examples/gaussian_process/plot_gpc_isoprobability.py @@ -14,12 +14,12 @@ # License: BSD 3 clause import numpy as np - -from matplotlib import pyplot as plt from matplotlib import cm +from matplotlib import pyplot as plt from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import DotProduct, ConstantKernel as C +from sklearn.gaussian_process.kernels import ConstantKernel as C +from sklearn.gaussian_process.kernels import DotProduct # A few constants lim = 8 diff --git a/examples/gaussian_process/plot_gpc_xor.py b/examples/gaussian_process/plot_gpc_xor.py index 6e6217dba8b9e..4439a5ee722b6 100644 --- a/examples/gaussian_process/plot_gpc_xor.py +++ b/examples/gaussian_process/plot_gpc_xor.py @@ -15,13 +15,12 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF, DotProduct - xx, yy = np.meshgrid(np.linspace(-3, 3, 50), np.linspace(-3, 3, 50)) rng = np.random.RandomState(0) X = rng.randn(200, 2) diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py index bfc1c21631b26..a3acd1dbfcbd3 100644 --- a/examples/gaussian_process/plot_gpr_co2.py +++ b/examples/gaussian_process/plot_gpr_co2.py @@ -172,6 +172,7 @@ # Thus, we create synthetic data from 1958 to the current month. In addition, # we need to add the subtracted mean computed during training. import datetime + import numpy as np today = datetime.datetime.now() diff --git a/examples/gaussian_process/plot_gpr_on_structured_data.py b/examples/gaussian_process/plot_gpr_on_structured_data.py index ada50a0edf06b..e702f1fe0769a 100644 --- a/examples/gaussian_process/plot_gpr_on_structured_data.py +++ b/examples/gaussian_process/plot_gpr_on_structured_data.py @@ -40,11 +40,10 @@ # %% import numpy as np -from sklearn.gaussian_process.kernels import Kernel, Hyperparameter -from sklearn.gaussian_process.kernels import GenericKernelMixin -from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.gaussian_process import GaussianProcessClassifier + from sklearn.base import clone +from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor +from sklearn.gaussian_process.kernels import GenericKernelMixin, Hyperparameter, Kernel class SequenceKernel(GenericKernelMixin, Kernel): diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index d83922817e5de..9dc8b6c831710 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -44,21 +44,21 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd +from sklearn.datasets import fetch_california_housing +from sklearn.ensemble import RandomForestRegressor + # To use this experimental feature, we need to explicitly ask for it: from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.datasets import fetch_california_housing -from sklearn.impute import SimpleImputer -from sklearn.impute import IterativeImputer -from sklearn.linear_model import BayesianRidge, Ridge +from sklearn.impute import IterativeImputer, SimpleImputer from sklearn.kernel_approximation import Nystroem -from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import BayesianRidge, Ridge +from sklearn.model_selection import cross_val_score from sklearn.neighbors import KNeighborsRegressor from sklearn.pipeline import make_pipeline -from sklearn.model_selection import cross_val_score N_SPLITS = 5 diff --git a/examples/impute/plot_missing_values.py b/examples/impute/plot_missing_values.py index f6350ad2544dd..4b9f8ae079d8a 100644 --- a/examples/impute/plot_missing_values.py +++ b/examples/impute/plot_missing_values.py @@ -44,9 +44,7 @@ import numpy as np -from sklearn.datasets import fetch_california_housing -from sklearn.datasets import load_diabetes - +from sklearn.datasets import fetch_california_housing, load_diabetes rng = np.random.RandomState(42) @@ -95,11 +93,10 @@ def add_missing_values(X_full, y_full): # To use the experimental IterativeImputer, we need to explicitly ask for it: from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer +from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer from sklearn.model_selection import cross_val_score from sklearn.pipeline import make_pipeline - N_SPLITS = 4 regressor = RandomForestRegressor(random_state=0) @@ -260,7 +257,6 @@ def get_impute_iterative(X_missing, y_missing): import matplotlib.pyplot as plt - n_bars = len(mses_diabetes) xval = np.arange(n_bars) diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py index d978ee860636c..eb935ee41ae67 100644 --- a/examples/inspection/plot_linear_model_coefficient_interpretation.py +++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py @@ -40,10 +40,10 @@ """ # %% +import matplotlib.pyplot as plt import numpy as np -import scipy as sp import pandas as pd -import matplotlib.pyplot as plt +import scipy as sp import seaborn as sns # %% @@ -53,7 +53,6 @@ # We fetch the data from `OpenML `_. # Note that setting the parameter `as_frame` to True will retrieve the data # as a pandas dataframe. - from sklearn.datasets import fetch_openml survey = fetch_openml(data_id=534, as_frame=True, parser="pandas") @@ -154,9 +153,9 @@ # To describe the dataset as a linear model we use a ridge regressor # with a very small regularization and to model the logarithm of the WAGE. -from sklearn.pipeline import make_pipeline -from sklearn.linear_model import Ridge from sklearn.compose import TransformedTargetRegressor +from sklearn.linear_model import Ridge +from sklearn.pipeline import make_pipeline model = make_pipeline( preprocessor, @@ -178,8 +177,7 @@ # on the test set and computing, # for example, the median absolute error of the model. -from sklearn.metrics import median_absolute_error -from sklearn.metrics import PredictionErrorDisplay +from sklearn.metrics import PredictionErrorDisplay, median_absolute_error mae_train = median_absolute_error(y_train, model.predict(X_train)) y_pred = model.predict(X_test) @@ -319,8 +317,7 @@ # their robustness is not guaranteed, and they should probably be interpreted # with caution. -from sklearn.model_selection import cross_validate -from sklearn.model_selection import RepeatedKFold +from sklearn.model_selection import RepeatedKFold, cross_validate cv = RepeatedKFold(n_splits=5, n_repeats=5, random_state=0) cv_model = cross_validate( diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index 43404b356d829..ed7a656da9926 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -100,8 +100,9 @@ # We plot the average number of bike rentals by grouping the data by season and # by year. from itertools import product -import numpy as np + import matplotlib.pyplot as plt +import numpy as np days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat") hours = tuple(range(24)) @@ -157,8 +158,7 @@ # numerical features and encode the categorical features with a # :class:`~sklearn.preprocessing.OneHotEncoder`. from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import QuantileTransformer -from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import OneHotEncoder, QuantileTransformer mlp_preprocessor = ColumnTransformer( transformers=[ @@ -203,6 +203,7 @@ # Let's fit a :class:`~sklearn.neural_network.MLPRegressor` and compute # single-variable partial dependence plots. from time import time + from sklearn.neural_network import MLPRegressor from sklearn.pipeline import make_pipeline @@ -242,6 +243,7 @@ # # We will plot the averaged partial dependence. import matplotlib.pyplot as plt + from sklearn.inspection import PartialDependenceDisplay common_params = { @@ -529,10 +531,9 @@ # # Let's make the same partial dependence plot for the 2 features interaction, # this time in 3 dimensions. -import numpy as np - # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +import numpy as np from sklearn.inspection import partial_dependence diff --git a/examples/inspection/plot_permutation_importance.py b/examples/inspection/plot_permutation_importance.py index cf0907ce3fd37..789506e892e3a 100644 --- a/examples/inspection/plot_permutation_importance.py +++ b/examples/inspection/plot_permutation_importance.py @@ -64,9 +64,9 @@ # categorical features; # - use :class:`~sklearn.impute.SimpleImputer` to fill missing values for # numerical features using a mean strategy. +from sklearn.compose import ColumnTransformer from sklearn.ensemble import RandomForestClassifier from sklearn.impute import SimpleImputer -from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.preprocessing import OrdinalEncoder diff --git a/examples/inspection/plot_permutation_importance_multicollinear.py b/examples/inspection/plot_permutation_importance_multicollinear.py index 59871c00946a6..e14916e808af9 100644 --- a/examples/inspection/plot_permutation_importance_multicollinear.py +++ b/examples/inspection/plot_permutation_importance_multicollinear.py @@ -22,9 +22,9 @@ import matplotlib.pyplot as plt import numpy as np -from scipy.stats import spearmanr from scipy.cluster import hierarchy from scipy.spatial.distance import squareform +from scipy.stats import spearmanr from sklearn.datasets import load_breast_cancer from sklearn.ensemble import RandomForestClassifier diff --git a/examples/kernel_approximation/plot_scalable_poly_kernels.py b/examples/kernel_approximation/plot_scalable_poly_kernels.py index 1a46e4bc2aa9c..8e5854bd8500c 100644 --- a/examples/kernel_approximation/plot_scalable_poly_kernels.py +++ b/examples/kernel_approximation/plot_scalable_poly_kernels.py @@ -64,8 +64,8 @@ # the LIBSVM webpage, and then normalize to unit length as done in the # original Tensor Sketch paper [1]. -from sklearn.preprocessing import MinMaxScaler, Normalizer from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import MinMaxScaler, Normalizer mm = make_pipeline(MinMaxScaler(), Normalizer()) X_train = mm.fit_transform(X_train) @@ -80,6 +80,7 @@ # plot them later. import time + from sklearn.svm import LinearSVC results = {} diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py index 261fec8aeee3b..79b49fb76ef9a 100644 --- a/examples/linear_model/plot_ard.py +++ b/examples/linear_model/plot_ard.py @@ -54,7 +54,8 @@ # coefficients. import pandas as pd -from sklearn.linear_model import ARDRegression, LinearRegression, BayesianRidge + +from sklearn.linear_model import ARDRegression, BayesianRidge, LinearRegression olr = LinearRegression().fit(X, y) brr = BayesianRidge(compute_score=True, n_iter=30).fit(X, y) diff --git a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py index 3bca3101758ff..8313b0b56922e 100644 --- a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py +++ b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py @@ -20,6 +20,7 @@ # %% # Let's start by loading the dataset and creating some sample weights. import numpy as np + from sklearn.datasets import make_regression rng = np.random.RandomState(0) diff --git a/examples/linear_model/plot_huber_vs_ridge.py b/examples/linear_model/plot_huber_vs_ridge.py index 2ea5a190e35d8..7c0222b71a721 100644 --- a/examples/linear_model/plot_huber_vs_ridge.py +++ b/examples/linear_model/plot_huber_vs_ridge.py @@ -16,8 +16,8 @@ # Authors: Manoj Kumar mks542@nyu.edu # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import make_regression from sklearn.linear_model import HuberRegressor, Ridge diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py index faf547c783609..b1e4d76c7f221 100644 --- a/examples/linear_model/plot_iris_logistic.py +++ b/examples/linear_model/plot_iris_logistic.py @@ -15,9 +15,10 @@ # License: BSD 3 clause import matplotlib.pyplot as plt -from sklearn.linear_model import LogisticRegression + from sklearn import datasets from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.linear_model import LogisticRegression # import some data to play with iris = datasets.load_iris() diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py index b08837304730a..075d8a50d2f62 100644 --- a/examples/linear_model/plot_lasso_and_elasticnet.py +++ b/examples/linear_model/plot_lasso_and_elasticnet.py @@ -112,9 +112,10 @@ # :class:`~sklearn.model_selection.TimeSeriesSplit` cross-validation strategy to a # :class:`~sklearn.linear_model.LassoCV`. To keep the example simple and fast to # execute, we directly set the optimal value for alpha here. +from time import time + from sklearn.linear_model import Lasso from sklearn.metrics import r2_score -from time import time t0 = time() lasso = Lasso(alpha=0.14).fit(X_train, y_train) @@ -181,8 +182,8 @@ # and estimated coefficients of the respective linear models. import matplotlib.pyplot as plt -import seaborn as sns import pandas as pd +import seaborn as sns from matplotlib.colors import SymLogNorm df = pd.DataFrame( diff --git a/examples/linear_model/plot_lasso_coordinate_descent_path.py b/examples/linear_model/plot_lasso_coordinate_descent_path.py index 1796dc5011644..ee2f09f000d23 100644 --- a/examples/linear_model/plot_lasso_coordinate_descent_path.py +++ b/examples/linear_model/plot_lasso_coordinate_descent_path.py @@ -14,12 +14,12 @@ # License: BSD 3 clause from itertools import cycle -import numpy as np + import matplotlib.pyplot as plt +import numpy as np -from sklearn.linear_model import lasso_path, enet_path from sklearn import datasets - +from sklearn.linear_model import enet_path, lasso_path X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py index 8da1820c0b0c4..a797d5d708160 100644 --- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py +++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py @@ -9,13 +9,12 @@ """ from time import time -from scipy import sparse -from scipy import linalg + +from scipy import linalg, sparse from sklearn.datasets import make_regression from sklearn.linear_model import Lasso - # %% # Comparing the two Lasso implementations on Dense data # ----------------------------------------------------- diff --git a/examples/linear_model/plot_lasso_lars.py b/examples/linear_model/plot_lasso_lars.py index 6788b8b1d1598..5444aeec90c65 100644 --- a/examples/linear_model/plot_lasso_lars.py +++ b/examples/linear_model/plot_lasso_lars.py @@ -14,11 +14,10 @@ # Alexandre Gramfort # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import linear_model -from sklearn import datasets +from sklearn import datasets, linear_model X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/linear_model/plot_lasso_lars_ic.py b/examples/linear_model/plot_lasso_lars_ic.py index 95c0d0d66608d..8b265130f2f10 100644 --- a/examples/linear_model/plot_lasso_lars_ic.py +++ b/examples/linear_model/plot_lasso_lars_ic.py @@ -45,9 +45,9 @@ # # In the following, we are going to fit two models to compare the values # reported by AIC and BIC. -from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LassoLarsIC from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y) diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index 7735f01987aa9..169d85ed81644 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -59,9 +59,10 @@ # # We will first fit a Lasso model with the AIC criterion. import time -from sklearn.preprocessing import StandardScaler + from sklearn.linear_model import LassoLarsIC from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler start_time = time.time() lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y) diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py index 801c893e5e28e..6ed3c86e8c27b 100644 --- a/examples/linear_model/plot_logistic.py +++ b/examples/linear_model/plot_logistic.py @@ -15,6 +15,7 @@ import matplotlib.pyplot as plt import numpy as np from scipy.special import expit + from sklearn.linear_model import LinearRegression, LogisticRegression # Generate a toy dataset, it's just a straight line with some Gaussian noise: diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py index e8f5a2d51b637..80374d3833151 100644 --- a/examples/linear_model/plot_logistic_l1_l2_sparsity.py +++ b/examples/linear_model/plot_logistic_l1_l2_sparsity.py @@ -20,11 +20,11 @@ # Andreas Mueller # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.linear_model import LogisticRegression from sklearn import datasets +from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import StandardScaler X, y = datasets.load_digits(return_X_y=True) diff --git a/examples/linear_model/plot_logistic_multinomial.py b/examples/linear_model/plot_logistic_multinomial.py index 814eeadaa68c4..791a788b2238b 100644 --- a/examples/linear_model/plot_logistic_multinomial.py +++ b/examples/linear_model/plot_logistic_multinomial.py @@ -12,11 +12,12 @@ # Authors: Tom Dupre la Tour # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_blobs -from sklearn.linear_model import LogisticRegression from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.linear_model import LogisticRegression # make 3-class dataset for classification centers = [[-5, 0], [0, 1.5], [5, -1]] diff --git a/examples/linear_model/plot_multi_task_lasso_support.py b/examples/linear_model/plot_multi_task_lasso_support.py index a30b51ed7a7fe..9b6ea64ce4d85 100644 --- a/examples/linear_model/plot_multi_task_lasso_support.py +++ b/examples/linear_model/plot_multi_task_lasso_support.py @@ -39,7 +39,7 @@ # Fit models # ---------- -from sklearn.linear_model import MultiTaskLasso, Lasso +from sklearn.linear_model import Lasso, MultiTaskLasso coef_lasso_ = np.array([Lasso(alpha=0.5).fit(X, y).coef_ for y in Y.T]) coef_multi_task_lasso_ = MultiTaskLasso(alpha=1.0).fit(X, Y).coef_ diff --git a/examples/linear_model/plot_nnls.py b/examples/linear_model/plot_nnls.py index c8ba2914d783a..05a8550ec166b 100644 --- a/examples/linear_model/plot_nnls.py +++ b/examples/linear_model/plot_nnls.py @@ -9,8 +9,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.metrics import r2_score # %% diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py index 0618f545306db..244bd86387474 100644 --- a/examples/linear_model/plot_ols.py +++ b/examples/linear_model/plot_ols.py @@ -19,6 +19,7 @@ import matplotlib.pyplot as plt import numpy as np + from sklearn import datasets, linear_model from sklearn.metrics import mean_squared_error, r2_score diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py index 7288cc9ae6594..0c95d483f1bf3 100644 --- a/examples/linear_model/plot_ols_3d.py +++ b/examples/linear_model/plot_ols_3d.py @@ -16,9 +16,10 @@ # %% # First we load the diabetes dataset. -from sklearn import datasets import numpy as np +from sklearn import datasets + X, y = datasets.load_diabetes(return_X_y=True) indices = (0, 1) diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py index b02ab193842d4..a03d9c253c1cf 100644 --- a/examples/linear_model/plot_ols_ridge_variance.py +++ b/examples/linear_model/plot_ols_ridge_variance.py @@ -24,8 +24,8 @@ # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn import linear_model diff --git a/examples/linear_model/plot_omp.py b/examples/linear_model/plot_omp.py index 9329962cce4f6..aa6044173b8ce 100644 --- a/examples/linear_model/plot_omp.py +++ b/examples/linear_model/plot_omp.py @@ -10,9 +10,9 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.linear_model import OrthogonalMatchingPursuit -from sklearn.linear_model import OrthogonalMatchingPursuitCV + from sklearn.datasets import make_sparse_coded_signal +from sklearn.linear_model import OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV n_components, n_features = 512, 100 n_nonzero_coefs = 17 diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py index 46f5c23578b55..cf38ca520f076 100644 --- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py +++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py @@ -41,21 +41,18 @@ # Olivier Grisel # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd - ############################################################################## # The French Motor Third-Party Liability Claims dataset # ----------------------------------------------------- # # Let's load the motor claim dataset from OpenML: # https://www.openml.org/d/41214 - from sklearn.datasets import fetch_openml - df = fetch_openml(data_id=41214, as_frame=True, parser="pandas").frame df @@ -97,11 +94,14 @@ # In order to fit linear models with those predictors it is therefore # necessary to perform standard feature transformations as follows: -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import FunctionTransformer, OneHotEncoder -from sklearn.preprocessing import StandardScaler, KBinsDiscretizer from sklearn.compose import ColumnTransformer - +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import ( + FunctionTransformer, + KBinsDiscretizer, + OneHotEncoder, + StandardScaler, +) log_scale_transformer = make_pipeline( FunctionTransformer(np.log, validate=False), StandardScaler() @@ -139,8 +139,8 @@ # the training sample. from sklearn.dummy import DummyRegressor -from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split +from sklearn.pipeline import Pipeline df_train, df_test = train_test_split(df, test_size=0.33, random_state=0) @@ -156,9 +156,11 @@ # Let's compute the performance of this constant prediction baseline with 3 # different regression metrics: -from sklearn.metrics import mean_squared_error -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_poisson_deviance +from sklearn.metrics import ( + mean_absolute_error, + mean_poisson_deviance, + mean_squared_error, +) def score_estimator(estimator, df_test): @@ -213,7 +215,6 @@ def score_estimator(estimator, df_test): from sklearn.linear_model import Ridge - ridge_glm = Pipeline( [ ("preprocessor", linear_model_preprocessor), @@ -285,7 +286,6 @@ def score_estimator(estimator, df_test): from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.preprocessing import OrdinalEncoder - tree_preprocessor = ColumnTransformer( [ ( diff --git a/examples/linear_model/plot_polynomial_interpolation.py b/examples/linear_model/plot_polynomial_interpolation.py index ac2fe28de870d..f648b7aea762d 100644 --- a/examples/linear_model/plot_polynomial_interpolation.py +++ b/examples/linear_model/plot_polynomial_interpolation.py @@ -42,13 +42,12 @@ # Malte Londschien # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.linear_model import Ridge -from sklearn.preprocessing import PolynomialFeatures, SplineTransformer from sklearn.pipeline import make_pipeline - +from sklearn.preprocessing import PolynomialFeatures, SplineTransformer # %% # We start by defining a function that we intend to approximate and prepare diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py index b66434fa1c0c1..715e6129cdef8 100644 --- a/examples/linear_model/plot_quantile_regression.py +++ b/examples/linear_model/plot_quantile_regression.py @@ -111,7 +111,7 @@ # # We will use the quantiles at 5% and 95% to find the outliers in the training # sample beyond the central 90% interval. -from sklearn.utils.fixes import sp_version, parse_version +from sklearn.utils.fixes import parse_version, sp_version # This is line is to avoid incompatibility if older SciPy version. # You should use `solver="highs"` with recent version of SciPy. @@ -253,8 +253,7 @@ # distributed target to make it more interesting as mean and median are not # equal. from sklearn.linear_model import LinearRegression -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_absolute_error, mean_squared_error linear_regression = LinearRegression() quantile_regression = QuantileRegressor(quantile=0.5, alpha=0, solver=solver) diff --git a/examples/linear_model/plot_ransac.py b/examples/linear_model/plot_ransac.py index 0301dd0ba0088..7b89150c4bd20 100644 --- a/examples/linear_model/plot_ransac.py +++ b/examples/linear_model/plot_ransac.py @@ -18,8 +18,7 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn import linear_model, datasets - +from sklearn import datasets, linear_model n_samples = 1000 n_outliers = 50 diff --git a/examples/linear_model/plot_ridge_path.py b/examples/linear_model/plot_ridge_path.py index 66f8fd9eb6c23..01f9d45a63f8d 100644 --- a/examples/linear_model/plot_ridge_path.py +++ b/examples/linear_model/plot_ridge_path.py @@ -30,8 +30,9 @@ # Author: Fabian Pedregosa -- # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import linear_model # X is the 10x10 Hilbert matrix diff --git a/examples/linear_model/plot_robust_fit.py b/examples/linear_model/plot_robust_fit.py index c9fe49fc0d416..79213c9a8e83e 100644 --- a/examples/linear_model/plot_robust_fit.py +++ b/examples/linear_model/plot_robust_fit.py @@ -30,18 +30,18 @@ """ -from matplotlib import pyplot as plt import numpy as np +from matplotlib import pyplot as plt from sklearn.linear_model import ( + HuberRegressor, LinearRegression, - TheilSenRegressor, RANSACRegressor, - HuberRegressor, + TheilSenRegressor, ) from sklearn.metrics import mean_squared_error -from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import PolynomialFeatures np.random.seed(42) diff --git a/examples/linear_model/plot_sgd_comparison.py b/examples/linear_model/plot_sgd_comparison.py index 5ab0d6b1b2827..0477e42cf5947 100644 --- a/examples/linear_model/plot_sgd_comparison.py +++ b/examples/linear_model/plot_sgd_comparison.py @@ -9,14 +9,17 @@ # Author: Rob Zinkov # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn import datasets +import numpy as np +from sklearn import datasets +from sklearn.linear_model import ( + LogisticRegression, + PassiveAggressiveClassifier, + Perceptron, + SGDClassifier, +) from sklearn.model_selection import train_test_split -from sklearn.linear_model import SGDClassifier, Perceptron -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.linear_model import LogisticRegression heldout = [0.95, 0.90, 0.75, 0.50, 0.01] # Number of rounds to fit and evaluate an estimator. diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py index 4fb884804492d..6713a74342ba2 100644 --- a/examples/linear_model/plot_sgd_early_stopping.py +++ b/examples/linear_model/plot_sgd_early_stopping.py @@ -41,19 +41,19 @@ # # License: BSD 3 clause -import time import sys +import time -import pandas as pd -import numpy as np import matplotlib.pyplot as plt +import numpy as np +import pandas as pd from sklearn import linear_model from sklearn.datasets import fetch_openml -from sklearn.model_selection import train_test_split -from sklearn.utils._testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning +from sklearn.model_selection import train_test_split from sklearn.utils import shuffle +from sklearn.utils._testing import ignore_warnings def load_mnist(n_samples=None, class_0="0", class_1="8"): diff --git a/examples/linear_model/plot_sgd_iris.py b/examples/linear_model/plot_sgd_iris.py index 64dca07396d54..5d9b923f9b444 100644 --- a/examples/linear_model/plot_sgd_iris.py +++ b/examples/linear_model/plot_sgd_iris.py @@ -9,11 +9,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets -from sklearn.linear_model import SGDClassifier from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.linear_model import SGDClassifier # import some data to play with iris = datasets.load_iris() diff --git a/examples/linear_model/plot_sgd_loss_functions.py b/examples/linear_model/plot_sgd_loss_functions.py index a1f74dca4d6af..140562184b946 100644 --- a/examples/linear_model/plot_sgd_loss_functions.py +++ b/examples/linear_model/plot_sgd_loss_functions.py @@ -8,8 +8,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np def modified_huber_loss(y_true, y_pred): diff --git a/examples/linear_model/plot_sgd_penalties.py b/examples/linear_model/plot_sgd_penalties.py index 0413751fb41a9..ff71dba5f20a3 100644 --- a/examples/linear_model/plot_sgd_penalties.py +++ b/examples/linear_model/plot_sgd_penalties.py @@ -11,8 +11,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np l1_color = "navy" l2_color = "c" diff --git a/examples/linear_model/plot_sgd_separating_hyperplane.py b/examples/linear_model/plot_sgd_separating_hyperplane.py index af288fcd3dde0..e84ab7c519ae9 100644 --- a/examples/linear_model/plot_sgd_separating_hyperplane.py +++ b/examples/linear_model/plot_sgd_separating_hyperplane.py @@ -9,10 +9,11 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn.linear_model import SGDClassifier +import numpy as np + from sklearn.datasets import make_blobs +from sklearn.linear_model import SGDClassifier # we create 50 separable points X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) diff --git a/examples/linear_model/plot_sgd_weighted_samples.py b/examples/linear_model/plot_sgd_weighted_samples.py index 2db52042b075f..4d605e99b4e49 100644 --- a/examples/linear_model/plot_sgd_weighted_samples.py +++ b/examples/linear_model/plot_sgd_weighted_samples.py @@ -8,8 +8,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import linear_model # we create 20 points diff --git a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py index c25f4a84d91e0..2f03768f50532 100644 --- a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py +++ b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py @@ -19,13 +19,14 @@ """ # noqa: E501 -import numpy as np -import matplotlib.pyplot as plt import matplotlib -from sklearn.svm import OneClassSVM -from sklearn.linear_model import SGDOneClassSVM +import matplotlib.pyplot as plt +import numpy as np + from sklearn.kernel_approximation import Nystroem +from sklearn.linear_model import SGDOneClassSVM from sklearn.pipeline import make_pipeline +from sklearn.svm import OneClassSVM font = {"weight": "normal", "size": 15} diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py index 507dda5c76901..f62208aab154a 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py +++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py @@ -29,9 +29,9 @@ import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized +from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split -from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") t0 = timeit.default_timer() diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py index 37327aeaa4cb7..e6746b8fb0896 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py +++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py @@ -21,6 +21,7 @@ # License: BSD 3 clause import time + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_theilsen.py b/examples/linear_model/plot_theilsen.py index b380baf705a76..eb0ac4966841d 100644 --- a/examples/linear_model/plot_theilsen.py +++ b/examples/linear_model/plot_theilsen.py @@ -39,10 +39,11 @@ # License: BSD 3 clause import time -import numpy as np + import matplotlib.pyplot as plt -from sklearn.linear_model import LinearRegression, TheilSenRegressor -from sklearn.linear_model import RANSACRegressor +import numpy as np + +from sklearn.linear_model import LinearRegression, RANSACRegressor, TheilSenRegressor estimators = [ ("OLS", LinearRegression()), diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index 1d7a5c5ed179f..2ee4b4b18fd7b 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -46,14 +46,16 @@ from functools import partial -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd from sklearn.datasets import fetch_openml -from sklearn.metrics import mean_tweedie_deviance -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_squared_error +from sklearn.metrics import ( + mean_absolute_error, + mean_squared_error, + mean_tweedie_deviance, +) def load_mtpl2(n_samples=None): @@ -209,11 +211,14 @@ def score_estimator( # containing the number of claims (``ClaimNb``), with the freMTPL2sev table, # containing the claim amount (``ClaimAmount``) for the same policy ids # (``IDpol``). -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import FunctionTransformer, OneHotEncoder -from sklearn.preprocessing import StandardScaler, KBinsDiscretizer from sklearn.compose import ColumnTransformer - +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import ( + FunctionTransformer, + KBinsDiscretizer, + OneHotEncoder, + StandardScaler, +) df = load_mtpl2() @@ -274,9 +279,8 @@ def score_estimator( # constant rate in a given time interval (``Exposure``, in units of years). # Here we model the frequency ``y = ClaimNb / Exposure``, which is still a # (scaled) Poisson distribution, and use ``Exposure`` as `sample_weight`. -from sklearn.model_selection import train_test_split from sklearn.linear_model import PoissonRegressor - +from sklearn.model_selection import train_test_split df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0) @@ -396,7 +400,6 @@ def score_estimator( # more than one claim. from sklearn.linear_model import GammaRegressor - mask_train = df_train["ClaimAmount"] > 0 mask_test = df_test["ClaimAmount"] > 0 @@ -540,7 +543,6 @@ def score_estimator( # regardless of `power`. from sklearn.linear_model import TweedieRegressor - glm_pure_premium = TweedieRegressor(power=1.9, alpha=0.1, solver="newton-cholesky") glm_pure_premium.fit( X_train, df_train["PurePremium"], sample_weight=df_train["Exposure"] diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py index 3773f11605241..88ce0f26b8dde 100644 --- a/examples/manifold/plot_compare_methods.py +++ b/examples/manifold/plot_compare_methods.py @@ -29,12 +29,12 @@ # We start by generating the S-curve dataset. import matplotlib.pyplot as plt -from matplotlib import ticker # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +from matplotlib import ticker -from sklearn import manifold, datasets +from sklearn import datasets, manifold n_samples = 1500 S_points, S_color = datasets.make_s_curve(n_samples, random_state=0) diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py index 7d4b6610cee49..4424d700789ff 100644 --- a/examples/manifold/plot_lle_digits.py +++ b/examples/manifold/plot_lle_digits.py @@ -45,6 +45,7 @@ # scattered across it. import numpy as np from matplotlib import offsetbox + from sklearn.preprocessing import MinMaxScaler @@ -103,11 +104,11 @@ def plot_embedding(X, title): from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.ensemble import RandomTreesEmbedding from sklearn.manifold import ( + MDS, + TSNE, Isomap, LocallyLinearEmbedding, - MDS, SpectralEmbedding, - TSNE, ) from sklearn.neighbors import NeighborhoodComponentsAnalysis from sklearn.pipeline import make_pipeline diff --git a/examples/manifold/plot_manifold_sphere.py b/examples/manifold/plot_manifold_sphere.py index 46db3f9f60e6d..624206ff4d5e0 100644 --- a/examples/manifold/plot_manifold_sphere.py +++ b/examples/manifold/plot_manifold_sphere.py @@ -29,14 +29,16 @@ # License: BSD 3 clause from time import time -import numpy as np + import matplotlib.pyplot as plt -from matplotlib.ticker import NullFormatter -from sklearn import manifold -from sklearn.utils import check_random_state # Unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +import numpy as np +from matplotlib.ticker import NullFormatter + +from sklearn import manifold +from sklearn.utils import check_random_state # Variables for manifold learning. n_neighbors = 10 diff --git a/examples/manifold/plot_mds.py b/examples/manifold/plot_mds.py index 51f9745a33f59..2bb56f1f4ed2a 100644 --- a/examples/manifold/plot_mds.py +++ b/examples/manifold/plot_mds.py @@ -14,13 +14,12 @@ # License: BSD import numpy as np - from matplotlib import pyplot as plt from matplotlib.collections import LineCollection from sklearn import manifold -from sklearn.metrics import euclidean_distances from sklearn.decomposition import PCA +from sklearn.metrics import euclidean_distances EPSILON = np.finfo(np.float32).eps n_samples = 20 diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py index 4a71eb83cc972..fe17d9f80030f 100644 --- a/examples/manifold/plot_swissroll.py +++ b/examples/manifold/plot_swissroll.py @@ -15,8 +15,8 @@ # We start by generating the Swiss Roll dataset. import matplotlib.pyplot as plt -from sklearn import manifold, datasets +from sklearn import datasets, manifold sr_points, sr_color = datasets.make_swiss_roll(n_samples=1500, random_state=0) diff --git a/examples/manifold/plot_t_sne_perplexity.py b/examples/manifold/plot_t_sne_perplexity.py index 014114a8a37d7..314458427f593 100644 --- a/examples/manifold/plot_t_sne_perplexity.py +++ b/examples/manifold/plot_t_sne_perplexity.py @@ -27,12 +27,13 @@ # Author: Narine Kokhlikyan # License: BSD -import numpy as np -import matplotlib.pyplot as plt +from time import time +import matplotlib.pyplot as plt +import numpy as np from matplotlib.ticker import NullFormatter -from sklearn import manifold, datasets -from time import time + +from sklearn import datasets, manifold n_samples = 150 n_components = 2 diff --git a/examples/miscellaneous/plot_anomaly_comparison.py b/examples/miscellaneous/plot_anomaly_comparison.py index ef274bf98fbe5..7fb6b71e2a5c6 100644 --- a/examples/miscellaneous/plot_anomaly_comparison.py +++ b/examples/miscellaneous/plot_anomaly_comparison.py @@ -68,17 +68,17 @@ import time -import numpy as np import matplotlib import matplotlib.pyplot as plt +import numpy as np from sklearn import svm -from sklearn.datasets import make_moons, make_blobs from sklearn.covariance import EllipticEnvelope +from sklearn.datasets import make_blobs, make_moons from sklearn.ensemble import IsolationForest -from sklearn.neighbors import LocalOutlierFactor -from sklearn.linear_model import SGDOneClassSVM from sklearn.kernel_approximation import Nystroem +from sklearn.linear_model import SGDOneClassSVM +from sklearn.neighbors import LocalOutlierFactor from sklearn.pipeline import make_pipeline matplotlib.rcParams["contour.negative_linestyle"] = "solid" diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py index f108beced7a00..24095de3b5cae 100644 --- a/examples/miscellaneous/plot_display_object_visualization.py +++ b/examples/miscellaneous/plot_display_object_visualization.py @@ -24,10 +24,10 @@ # data is split into a train and test dataset and a logistic regression is # fitted with the train dataset. from sklearn.datasets import fetch_openml -from sklearn.preprocessing import StandardScaler -from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler X, y = fetch_openml(data_id=1464, return_X_y=True, parser="pandas") X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y) @@ -41,8 +41,7 @@ # With the fitted model, we compute the predictions of the model on the test # dataset. These predictions are used to compute the confustion matrix which # is plotted with the :class:`ConfusionMatrixDisplay` -from sklearn.metrics import confusion_matrix -from sklearn.metrics import ConfusionMatrixDisplay +from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix y_pred = clf.predict(X_test) cm = confusion_matrix(y_test, y_pred) @@ -56,8 +55,7 @@ # The roc curve requires either the probabilities or the non-thresholded # decision values from the estimator. Since the logistic regression provides # a decision function, we will use it to plot the roc curve: -from sklearn.metrics import roc_curve -from sklearn.metrics import RocCurveDisplay +from sklearn.metrics import RocCurveDisplay, roc_curve y_score = clf.decision_function(X_test) @@ -69,8 +67,7 @@ ############################################################################## # Similarly, the precision recall curve can be plotted using `y_score` from # the prevision sections. -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import PrecisionRecallDisplay +from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=clf.classes_[1]) pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot() diff --git a/examples/miscellaneous/plot_estimator_representation.py b/examples/miscellaneous/plot_estimator_representation.py index 304bb055e6762..1c9e3745db0de 100644 --- a/examples/miscellaneous/plot_estimator_representation.py +++ b/examples/miscellaneous/plot_estimator_representation.py @@ -7,12 +7,11 @@ displayed. """ -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import OneHotEncoder, StandardScaler -from sklearn.impute import SimpleImputer from sklearn.compose import make_column_transformer +from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression - +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler # %% # Compact text representation diff --git a/examples/miscellaneous/plot_isotonic_regression.py b/examples/miscellaneous/plot_isotonic_regression.py index 0240a8dec34b5..a1c1174c9e9de 100644 --- a/examples/miscellaneous/plot_isotonic_regression.py +++ b/examples/miscellaneous/plot_isotonic_regression.py @@ -23,12 +23,12 @@ # Alexandre Gramfort # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.collections import LineCollection -from sklearn.linear_model import LinearRegression from sklearn.isotonic import IsotonicRegression +from sklearn.linear_model import LinearRegression from sklearn.utils import check_random_state n = 100 diff --git a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py index 6fd9d3614804c..85161a6ee51bb 100644 --- a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py +++ b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py @@ -15,13 +15,16 @@ import sys from time import time -import numpy as np + import matplotlib.pyplot as plt -from sklearn.random_projection import johnson_lindenstrauss_min_dim -from sklearn.random_projection import SparseRandomProjection -from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.datasets import load_digits +import numpy as np + +from sklearn.datasets import fetch_20newsgroups_vectorized, load_digits from sklearn.metrics.pairwise import euclidean_distances +from sklearn.random_projection import ( + SparseRandomProjection, + johnson_lindenstrauss_min_dim, +) # %% # Theoretical bounds diff --git a/examples/miscellaneous/plot_kernel_approximation.py b/examples/miscellaneous/plot_kernel_approximation.py index ffb5d3940a055..7faf7900c7eb5 100644 --- a/examples/miscellaneous/plot_kernel_approximation.py +++ b/examples/miscellaneous/plot_kernel_approximation.py @@ -39,14 +39,15 @@ # License: BSD 3 clause # Standard scientific Python imports +from time import time + import matplotlib.pyplot as plt import numpy as np -from time import time # Import datasets, classifiers and performance metrics -from sklearn import datasets, svm, pipeline -from sklearn.kernel_approximation import RBFSampler, Nystroem +from sklearn import datasets, pipeline, svm from sklearn.decomposition import PCA +from sklearn.kernel_approximation import Nystroem, RBFSampler # The digits dataset digits = datasets.load_digits(n_class=9) diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py index fa7cb15446473..6d2288936179a 100644 --- a/examples/miscellaneous/plot_kernel_ridge_regression.py +++ b/examples/miscellaneous/plot_kernel_ridge_regression.py @@ -40,9 +40,9 @@ # Construct the kernel-based regression models # -------------------------------------------- +from sklearn.kernel_ridge import KernelRidge from sklearn.model_selection import GridSearchCV from sklearn.svm import SVR -from sklearn.kernel_ridge import KernelRidge train_size = 100 diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py index 81e3b6fc9a01d..b189f320cdd24 100644 --- a/examples/miscellaneous/plot_metadata_routing.py +++ b/examples/miscellaneous/plot_metadata_routing.py @@ -22,23 +22,29 @@ """ # %% -import numpy as np import warnings from pprint import pprint + +import numpy as np + from sklearn import set_config -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.base import RegressorMixin -from sklearn.base import MetaEstimatorMixin -from sklearn.base import TransformerMixin -from sklearn.base import clone +from sklearn.base import ( + BaseEstimator, + ClassifierMixin, + MetaEstimatorMixin, + RegressorMixin, + TransformerMixin, + clone, +) +from sklearn.linear_model import LinearRegression from sklearn.utils import metadata_routing -from sklearn.utils.metadata_routing import get_routing_for_object -from sklearn.utils.metadata_routing import MetadataRouter -from sklearn.utils.metadata_routing import MethodMapping -from sklearn.utils.metadata_routing import process_routing +from sklearn.utils.metadata_routing import ( + MetadataRouter, + MethodMapping, + get_routing_for_object, + process_routing, +) from sklearn.utils.validation import check_is_fitted -from sklearn.linear_model import LinearRegression n_samples, n_features = 100, 4 rng = np.random.RandomState(42) diff --git a/examples/miscellaneous/plot_multilabel.py b/examples/miscellaneous/plot_multilabel.py index aded595258fea..b424c3253104a 100644 --- a/examples/miscellaneous/plot_multilabel.py +++ b/examples/miscellaneous/plot_multilabel.py @@ -32,14 +32,14 @@ # Authors: Vlad Niculae, Mathieu Blondel # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.cross_decomposition import CCA from sklearn.datasets import make_multilabel_classification +from sklearn.decomposition import PCA from sklearn.multiclass import OneVsRestClassifier from sklearn.svm import SVC -from sklearn.decomposition import PCA -from sklearn.cross_decomposition import CCA def plot_hyperplane(clf, min_x, max_x, linestyle, label): diff --git a/examples/miscellaneous/plot_multioutput_face_completion.py b/examples/miscellaneous/plot_multioutput_face_completion.py index 31e73195747a5..62070bc05e488 100644 --- a/examples/miscellaneous/plot_multioutput_face_completion.py +++ b/examples/miscellaneous/plot_multioutput_face_completion.py @@ -12,16 +12,14 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import fetch_olivetti_faces -from sklearn.utils.validation import check_random_state - from sklearn.ensemble import ExtraTreesRegressor +from sklearn.linear_model import LinearRegression, RidgeCV from sklearn.neighbors import KNeighborsRegressor -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import RidgeCV +from sklearn.utils.validation import check_random_state # Load the faces datasets data, targets = fetch_olivetti_faces(return_X_y=True) diff --git a/examples/miscellaneous/plot_outlier_detection_bench.py b/examples/miscellaneous/plot_outlier_detection_bench.py index 781fa515f50e8..9b530ccab0807 100644 --- a/examples/miscellaneous/plot_outlier_detection_bench.py +++ b/examples/miscellaneous/plot_outlier_detection_bench.py @@ -32,10 +32,11 @@ # The `preprocess_dataset` function returns data and target. import numpy as np -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml -from sklearn.preprocessing import LabelBinarizer import pandas as pd +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml +from sklearn.preprocessing import LabelBinarizer + rng = np.random.RandomState(42) @@ -117,8 +118,8 @@ def preprocess_dataset(dataset_name): # `compute_prediction` function returns average outlier score of X. -from sklearn.neighbors import LocalOutlierFactor from sklearn.ensemble import IsolationForest +from sklearn.neighbors import LocalOutlierFactor def compute_prediction(X, model_name): @@ -145,7 +146,9 @@ def compute_prediction(X, model_name): import math + import matplotlib.pyplot as plt + from sklearn.metrics import RocCurveDisplay datasets_name = [ diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py index 336d7c36d1661..38a984fa5b0cd 100644 --- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py +++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py @@ -13,15 +13,15 @@ """ # noqa: E501 -import pandas as pd import matplotlib.pyplot as plt +import pandas as pd + from sklearn.datasets import load_diabetes +from sklearn.inspection import PartialDependenceDisplay from sklearn.neural_network import MLPRegressor -from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeRegressor -from sklearn.inspection import PartialDependenceDisplay - # %% # Train models on the diabetes dataset diff --git a/examples/miscellaneous/plot_pipeline_display.py b/examples/miscellaneous/plot_pipeline_display.py index f0fea8d2f3a27..9642bb56b903f 100755 --- a/examples/miscellaneous/plot_pipeline_display.py +++ b/examples/miscellaneous/plot_pipeline_display.py @@ -19,10 +19,10 @@ # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual # representation. +from sklearn import set_config +from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.linear_model import LogisticRegression -from sklearn import set_config steps = [ ("preprocessing", StandardScaler()), @@ -53,9 +53,9 @@ # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual # representation. -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler, PolynomialFeatures from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import PolynomialFeatures, StandardScaler steps = [ ("standard_scaler", StandardScaler()), @@ -73,9 +73,9 @@ # a classifier, :class:`~sklearn.svm.SVC`, and displays its visual # representation. +from sklearn.decomposition import PCA from sklearn.pipeline import Pipeline from sklearn.svm import SVC -from sklearn.decomposition import PCA steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))] pipe = Pipeline(steps) @@ -90,12 +90,12 @@ # representation. import numpy as np -from sklearn.pipeline import make_pipeline -from sklearn.pipeline import Pipeline -from sklearn.impute import SimpleImputer + from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler numeric_preprocessor = Pipeline( steps=[ @@ -133,13 +133,13 @@ # representation. import numpy as np -from sklearn.pipeline import make_pipeline -from sklearn.pipeline import Pipeline -from sklearn.impute import SimpleImputer + from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier +from sklearn.impute import SimpleImputer from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler numeric_preprocessor = Pipeline( steps=[ diff --git a/examples/miscellaneous/plot_roc_curve_visualization_api.py b/examples/miscellaneous/plot_roc_curve_visualization_api.py index b4e08493c77d4..7fc8df9724337 100644 --- a/examples/miscellaneous/plot_roc_curve_visualization_api.py +++ b/examples/miscellaneous/plot_roc_curve_visualization_api.py @@ -15,11 +15,12 @@ # First, we load the wine dataset and convert it to a binary classification # problem. Then, we train a support vector classifier on a training dataset. import matplotlib.pyplot as plt -from sklearn.svm import SVC + +from sklearn.datasets import load_wine from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import RocCurveDisplay -from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split +from sklearn.svm import SVC X, y = load_wine(return_X_y=True) y = y == 2 diff --git a/examples/miscellaneous/plot_set_output.py b/examples/miscellaneous/plot_set_output.py index a2088ae48adc3..725e04894614c 100644 --- a/examples/miscellaneous/plot_set_output.py +++ b/examples/miscellaneous/plot_set_output.py @@ -48,9 +48,9 @@ # %% # In a :class:`pipeline.Pipeline`, `set_output` configures all steps to output # DataFrames. -from sklearn.pipeline import make_pipeline -from sklearn.linear_model import LogisticRegression from sklearn.feature_selection import SelectPercentile +from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import make_pipeline clf = make_pipeline( StandardScaler(), SelectPercentile(percentile=75), LogisticRegression() @@ -76,10 +76,10 @@ # %% # The `set_output` API can be configured globally by using :func:`set_config` and # setting `transform_output` to `"pandas"`. +from sklearn import set_config from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.impute import SimpleImputer -from sklearn import set_config +from sklearn.preprocessing import OneHotEncoder, StandardScaler set_config(transform_output="pandas") diff --git a/examples/mixture/plot_concentration_prior.py b/examples/mixture/plot_concentration_prior.py index a56ec6325068b..6561186adb119 100644 --- a/examples/mixture/plot_concentration_prior.py +++ b/examples/mixture/plot_concentration_prior.py @@ -32,10 +32,10 @@ # Author: Thierry Guillemot # License: BSD 3 clause -import numpy as np import matplotlib as mpl -import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec +import matplotlib.pyplot as plt +import numpy as np from sklearn.mixture import BayesianGaussianMixture diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py index efc89baa8159a..82e48a8d13eb0 100644 --- a/examples/mixture/plot_gmm.py +++ b/examples/mixture/plot_gmm.py @@ -26,10 +26,10 @@ import itertools +import matplotlib as mpl +import matplotlib.pyplot as plt import numpy as np from scipy import linalg -import matplotlib.pyplot as plt -import matplotlib as mpl from sklearn import mixture diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py index aa0b78ab42a0b..9466e11749966 100644 --- a/examples/mixture/plot_gmm_covariances.py +++ b/examples/mixture/plot_gmm_covariances.py @@ -33,7 +33,6 @@ import matplotlib as mpl import matplotlib.pyplot as plt - import numpy as np from sklearn import datasets diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py index 3b4beefe8c99a..aa0266c98ff7a 100644 --- a/examples/mixture/plot_gmm_init.py +++ b/examples/mixture/plot_gmm_init.py @@ -37,12 +37,14 @@ # Author: Gordon Walsh # Data generation code from Jake Vanderplas +from timeit import default_timer as timer + import matplotlib.pyplot as plt import numpy as np + +from sklearn.datasets._samples_generator import make_blobs from sklearn.mixture import GaussianMixture from sklearn.utils.extmath import row_norms -from sklearn.datasets._samples_generator import make_blobs -from timeit import default_timer as timer print(__doc__) diff --git a/examples/mixture/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py index 70d58f22f8f41..062bdfd4d6d67 100644 --- a/examples/mixture/plot_gmm_pdf.py +++ b/examples/mixture/plot_gmm_pdf.py @@ -9,9 +9,10 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import LogNorm + from sklearn import mixture n_samples = 300 diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py index c8656a69fe9fb..34af17b8920bc 100644 --- a/examples/mixture/plot_gmm_sin.py +++ b/examples/mixture/plot_gmm_sin.py @@ -41,10 +41,10 @@ import itertools +import matplotlib as mpl +import matplotlib.pyplot as plt import numpy as np from scipy import linalg -import matplotlib.pyplot as plt -import matplotlib as mpl from sklearn import mixture diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index b891564db4025..278083a994e58 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -24,12 +24,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import svm, datasets -from sklearn.model_selection import train_test_split +from sklearn import datasets, svm from sklearn.metrics import ConfusionMatrixDisplay +from sklearn.model_selection import train_test_split # import some data to play with iris = datasets.load_iris() diff --git a/examples/model_selection/plot_cv_indices.py b/examples/model_selection/plot_cv_indices.py index 8b70191e4abd1..e6c3580c787f0 100644 --- a/examples/model_selection/plot_cv_indices.py +++ b/examples/model_selection/plot_cv_indices.py @@ -12,19 +12,20 @@ """ +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.patches import Patch + from sklearn.model_selection import ( - TimeSeriesSplit, + GroupKFold, + GroupShuffleSplit, KFold, ShuffleSplit, + StratifiedGroupKFold, StratifiedKFold, - GroupShuffleSplit, - GroupKFold, StratifiedShuffleSplit, - StratifiedGroupKFold, + TimeSeriesSplit, ) -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.patches import Patch rng = np.random.RandomState(1338) cmap_data = plt.cm.Paired diff --git a/examples/model_selection/plot_cv_predict.py b/examples/model_selection/plot_cv_predict.py index 7fd843c535c85..65517d85f3fd1 100644 --- a/examples/model_selection/plot_cv_predict.py +++ b/examples/model_selection/plot_cv_predict.py @@ -37,6 +37,7 @@ # residuals (i.e. the difference between the observed values and the predicted # values) vs. the predicted values. import matplotlib.pyplot as plt + from sklearn.metrics import PredictionErrorDisplay fig, axs = plt.subplots(ncols=2, figsize=(8, 4)) diff --git a/examples/model_selection/plot_det.py b/examples/model_selection/plot_det.py index 97dbe771e6407..7f7a5be32f976 100644 --- a/examples/model_selection/plot_det.py +++ b/examples/model_selection/plot_det.py @@ -82,6 +82,7 @@ # :func:`scipy.stats.norm`. import matplotlib.pyplot as plt + from sklearn.metrics import DetCurveDisplay, RocCurveDisplay fig, [ax_roc, ax_det] = plt.subplots(1, 2, figsize=(11, 5)) diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py index 7a7dd8ea3e463..a8dab986a48d2 100644 --- a/examples/model_selection/plot_grid_search_refit_callable.py +++ b/examples/model_selection/plot_grid_search_refit_callable.py @@ -20,8 +20,8 @@ # Author: Wenhao Zhang -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import load_digits from sklearn.decomposition import PCA diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py index 179d860b42128..fbeb485d8db44 100644 --- a/examples/model_selection/plot_grid_search_stats.py +++ b/examples/model_selection/plot_grid_search_stats.py @@ -16,6 +16,7 @@ import matplotlib.pyplot as plt import seaborn as sns + from sklearn.datasets import make_moons X, y = make_moons(noise=0.352, random_state=1, n_samples=100) diff --git a/examples/model_selection/plot_grid_search_text_feature_extraction.py b/examples/model_selection/plot_grid_search_text_feature_extraction.py index 9ad4296aad9b4..17c2e2bfd5d99 100644 --- a/examples/model_selection/plot_grid_search_text_feature_extraction.py +++ b/examples/model_selection/plot_grid_search_text_feature_extraction.py @@ -105,6 +105,7 @@ # via the parameter `n_jobs`. from pprint import pprint + from sklearn.model_selection import RandomizedSearchCV random_search = RandomizedSearchCV( diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py index 956c70aaabd82..450392679095f 100644 --- a/examples/model_selection/plot_learning_curve.py +++ b/examples/model_selection/plot_learning_curve.py @@ -38,6 +38,7 @@ # a cross-validation procedure. import matplotlib.pyplot as plt import numpy as np + from sklearn.model_selection import LearningCurveDisplay, ShuffleSplit fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6), sharey=True) diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py index e6ec94fc50cf9..9a3f29def9e98 100644 --- a/examples/model_selection/plot_likelihood_ratios.py +++ b/examples/model_selection/plot_likelihood_ratios.py @@ -55,8 +55,8 @@ class proportion than the target application. # ratio to evaluate the usefulness of this classifier as a disease diagnosis # tool: -from sklearn.metrics import class_likelihood_ratios from sklearn.linear_model import LogisticRegression +from sklearn.metrics import class_likelihood_ratios estimator = LogisticRegression().fit(X_train, y_train) y_pred = estimator.predict(X_test) @@ -166,10 +166,12 @@ def extract_score(cv_results): # label `1` corresponds to the positive class "disease", whereas the label `0` # stands for "no-disease". -import numpy as np +from collections import defaultdict + import matplotlib.pyplot as plt +import numpy as np + from sklearn.inspection import DecisionBoundaryDisplay -from collections import defaultdict populations = defaultdict(list) common_params = { diff --git a/examples/model_selection/plot_multi_metric_evaluation.py b/examples/model_selection/plot_multi_metric_evaluation.py index e47e67e086ccb..674bf8bc1b07c 100644 --- a/examples/model_selection/plot_multi_metric_evaluation.py +++ b/examples/model_selection/plot_multi_metric_evaluation.py @@ -23,9 +23,8 @@ from matplotlib import pyplot as plt from sklearn.datasets import make_hastie_10_2 +from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import GridSearchCV -from sklearn.metrics import make_scorer -from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier # %% diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py index b6f45255e8a09..7513a078b68ce 100644 --- a/examples/model_selection/plot_nested_cross_validation_iris.py +++ b/examples/model_selection/plot_nested_cross_validation_iris.py @@ -44,11 +44,12 @@ """ -from sklearn.datasets import load_iris +import numpy as np from matplotlib import pyplot as plt + +from sklearn.datasets import load_iris +from sklearn.model_selection import GridSearchCV, KFold, cross_val_score from sklearn.svm import SVC -from sklearn.model_selection import GridSearchCV, cross_val_score, KFold -import numpy as np # Number of random trials NUM_TRIALS = 30 diff --git a/examples/model_selection/plot_permutation_tests_for_classification.py b/examples/model_selection/plot_permutation_tests_for_classification.py index c9fcaebb549fe..a02f6d188f006 100644 --- a/examples/model_selection/plot_permutation_tests_for_classification.py +++ b/examples/model_selection/plot_permutation_tests_for_classification.py @@ -58,9 +58,8 @@ # the percentage of permutations for which the score obtained is greater # that the score obtained using the original data. +from sklearn.model_selection import StratifiedKFold, permutation_test_score from sklearn.svm import SVC -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import permutation_test_score clf = SVC(kernel="linear", random_state=7) cv = StratifiedKFold(2, shuffle=True, random_state=0) diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py index d11d6e10cdff6..52d85691af439 100644 --- a/examples/model_selection/plot_precision_recall.py +++ b/examples/model_selection/plot_precision_recall.py @@ -100,6 +100,7 @@ # # We will use a Linear SVC classifier to differentiate two types of irises. import numpy as np + from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split @@ -198,8 +199,7 @@ # %% # The average precision score in multi-label settings # ................................................... -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import average_precision_score +from sklearn.metrics import average_precision_score, precision_recall_curve # For each class precision = dict() @@ -232,9 +232,10 @@ # %% # Plot Precision-Recall curve for each class and iso-f1 curves # ............................................................ -import matplotlib.pyplot as plt from itertools import cycle +import matplotlib.pyplot as plt + # setup plot details colors = cycle(["navy", "turquoise", "darkorange", "cornflowerblue", "teal"]) diff --git a/examples/model_selection/plot_randomized_search.py b/examples/model_selection/plot_randomized_search.py index 9ffc26a5abc84..140b359ff1934 100644 --- a/examples/model_selection/plot_randomized_search.py +++ b/examples/model_selection/plot_randomized_search.py @@ -20,14 +20,14 @@ """ -import numpy as np - from time import time + +import numpy as np import scipy.stats as stats -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.datasets import load_digits from sklearn.linear_model import SGDClassifier +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV # get some data X, y = load_digits(return_X_y=True, n_class=3) diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 34346780def26..3fa1374f1b8a0 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -44,6 +44,7 @@ # Here we binarize the output and add noisy features to make the problem harder. import numpy as np + from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split @@ -118,6 +119,7 @@ # %% import matplotlib.pyplot as plt + from sklearn.metrics import RocCurveDisplay RocCurveDisplay.from_predictions( @@ -191,7 +193,7 @@ # :class:`~sklearn.metrics.roc_curve` and then the area under the curve with # :class:`~sklearn.metrics.auc` for the raveled true and predicted classes. -from sklearn.metrics import roc_curve, auc +from sklearn.metrics import auc, roc_curve # store the fpr, tpr, and roc_auc for all averaging strategies fpr, tpr, roc_auc = dict(), dict(), dict() diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index cf4c0496f54fb..a3663aa040b56 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -41,6 +41,7 @@ # (`class_id=0`). import numpy as np + from sklearn.datasets import load_iris iris = load_iris() @@ -66,8 +67,7 @@ import matplotlib.pyplot as plt from sklearn import svm -from sklearn.metrics import auc -from sklearn.metrics import RocCurveDisplay +from sklearn.metrics import RocCurveDisplay, auc from sklearn.model_selection import StratifiedKFold n_splits = 6 diff --git a/examples/model_selection/plot_successive_halving_heatmap.py b/examples/model_selection/plot_successive_halving_heatmap.py index ecdae48e64011..9b079e4b1351f 100644 --- a/examples/model_selection/plot_successive_halving_heatmap.py +++ b/examples/model_selection/plot_successive_halving_heatmap.py @@ -14,12 +14,10 @@ import numpy as np import pandas as pd -from sklearn.svm import SVC from sklearn import datasets -from sklearn.model_selection import GridSearchCV from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.model_selection import HalvingGridSearchCV - +from sklearn.model_selection import GridSearchCV, HalvingGridSearchCV +from sklearn.svm import SVC # %% # We first define the parameter space for an :class:`~sklearn.svm.SVC` diff --git a/examples/model_selection/plot_successive_halving_iterations.py b/examples/model_selection/plot_successive_halving_iterations.py index bd2d5635e376e..31805d308e269 100644 --- a/examples/model_selection/plot_successive_halving_iterations.py +++ b/examples/model_selection/plot_successive_halving_iterations.py @@ -10,16 +10,15 @@ """ -import pandas as pd -from sklearn import datasets import matplotlib.pyplot as plt -from scipy.stats import randint import numpy as np +import pandas as pd +from scipy.stats import randint +from sklearn import datasets +from sklearn.ensemble import RandomForestClassifier from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.model_selection import HalvingRandomSearchCV -from sklearn.ensemble import RandomForestClassifier - # %% # We first define the parameter space and train a diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index 1aba6f4892cbe..af7e7d14cdac0 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -19,6 +19,7 @@ # Generate sample data # -------------------- import numpy as np + from sklearn import linear_model from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split diff --git a/examples/model_selection/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py index ae8450b50cea9..412946fc9ca8b 100644 --- a/examples/model_selection/plot_underfitting_overfitting.py +++ b/examples/model_selection/plot_underfitting_overfitting.py @@ -21,12 +21,13 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import PolynomialFeatures +import numpy as np + from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_val_score +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import PolynomialFeatures def true_fun(X): diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index 48aa19dfbc556..947d8ac2b2fdb 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -17,8 +17,8 @@ import numpy as np from sklearn.datasets import load_digits -from sklearn.svm import SVC from sklearn.model_selection import ValidationCurveDisplay +from sklearn.svm import SVC X, y = load_digits(return_X_y=True) subset_mask = np.isin(y, [1, 2]) # binary classification: 1 vs 2 diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index e1f9feed43a97..1df4ee3b8346b 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -36,14 +36,15 @@ # Author: Adam Kleczewski # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_openml -from sklearn.multioutput import ClassifierChain +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import jaccard_score from sklearn.model_selection import train_test_split from sklearn.multiclass import OneVsRestClassifier -from sklearn.metrics import jaccard_score -from sklearn.linear_model import LogisticRegression +from sklearn.multioutput import ClassifierChain # Load a multi-label dataset from https://www.openml.org/d/40597 X, Y = fetch_openml("yeast", version=4, return_X_y=True, parser="pandas") diff --git a/examples/neighbors/approximate_nearest_neighbors.py b/examples/neighbors/approximate_nearest_neighbors.py index ee848cdc66428..faff31d7a85c9 100644 --- a/examples/neighbors/approximate_nearest_neighbors.py +++ b/examples/neighbors/approximate_nearest_neighbors.py @@ -40,6 +40,7 @@ import joblib import numpy as np from scipy.sparse import csr_matrix + from sklearn.base import BaseEstimator, TransformerMixin from sklearn.datasets import fetch_openml from sklearn.utils import shuffle diff --git a/examples/neighbors/plot_caching_nearest_neighbors.py b/examples/neighbors/plot_caching_nearest_neighbors.py index 00be6470c1591..10c0d315da7af 100644 --- a/examples/neighbors/plot_caching_nearest_neighbors.py +++ b/examples/neighbors/plot_caching_nearest_neighbors.py @@ -22,11 +22,12 @@ # # License: BSD 3 clause from tempfile import TemporaryDirectory + import matplotlib.pyplot as plt -from sklearn.neighbors import KNeighborsTransformer, KNeighborsClassifier -from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_digits +from sklearn.model_selection import GridSearchCV +from sklearn.neighbors import KNeighborsClassifier, KNeighborsTransformer from sklearn.pipeline import Pipeline X, y = load_digits(return_X_y=True) diff --git a/examples/neighbors/plot_classification.py b/examples/neighbors/plot_classification.py index cc4f0864ba926..4ed23862ae455 100644 --- a/examples/neighbors/plot_classification.py +++ b/examples/neighbors/plot_classification.py @@ -11,7 +11,8 @@ import matplotlib.pyplot as plt import seaborn as sns from matplotlib.colors import ListedColormap -from sklearn import neighbors, datasets + +from sklearn import datasets, neighbors from sklearn.inspection import DecisionBoundaryDisplay n_neighbors = 15 diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py index e580f9fa178bc..045058eab09cc 100644 --- a/examples/neighbors/plot_digits_kde_sampling.py +++ b/examples/neighbors/plot_digits_kde_sampling.py @@ -11,13 +11,13 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import load_digits -from sklearn.neighbors import KernelDensity from sklearn.decomposition import PCA from sklearn.model_selection import GridSearchCV +from sklearn.neighbors import KernelDensity # load the data digits = load_digits() diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py index 8b139d4cc2335..fc5b1914f23de 100644 --- a/examples/neighbors/plot_kde_1d.py +++ b/examples/neighbors/plot_kde_1d.py @@ -30,9 +30,10 @@ # Author: Jake Vanderplas # -import numpy as np import matplotlib.pyplot as plt +import numpy as np from scipy.stats import norm + from sklearn.neighbors import KernelDensity # ---------------------------------------------------------------------- diff --git a/examples/neighbors/plot_lof_novelty_detection.py b/examples/neighbors/plot_lof_novelty_detection.py index 277134cc77673..91e40661c6dfe 100644 --- a/examples/neighbors/plot_lof_novelty_detection.py +++ b/examples/neighbors/plot_lof_novelty_detection.py @@ -25,9 +25,10 @@ """ -import numpy as np import matplotlib import matplotlib.pyplot as plt +import numpy as np + from sklearn.neighbors import LocalOutlierFactor np.random.seed(42) diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py index a08bbe8be3756..f76770640ed03 100644 --- a/examples/neighbors/plot_nca_classification.py +++ b/examples/neighbors/plot_nca_classification.py @@ -19,13 +19,13 @@ import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap + from sklearn import datasets +from sklearn.inspection import DecisionBoundaryDisplay from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis from sklearn.pipeline import Pipeline -from sklearn.inspection import DecisionBoundaryDisplay - +from sklearn.preprocessing import StandardScaler n_neighbors = 1 diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py index d245e0223ccfa..82fd35616929e 100644 --- a/examples/neighbors/plot_nca_dim_reduction.py +++ b/examples/neighbors/plot_nca_dim_reduction.py @@ -30,12 +30,13 @@ # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets -from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py index d722ffa5be033..e5fd2f9cb67bd 100644 --- a/examples/neighbors/plot_nca_illustration.py +++ b/examples/neighbors/plot_nca_illustration.py @@ -12,13 +12,14 @@ # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn.datasets import make_classification -from sklearn.neighbors import NeighborhoodComponentsAnalysis +import numpy as np from matplotlib import cm from scipy.special import logsumexp +from sklearn.datasets import make_classification +from sklearn.neighbors import NeighborhoodComponentsAnalysis + # %% # Original points # --------------- diff --git a/examples/neighbors/plot_nearest_centroid.py b/examples/neighbors/plot_nearest_centroid.py index 4eb0e0388a30b..c8f710d0a0377 100644 --- a/examples/neighbors/plot_nearest_centroid.py +++ b/examples/neighbors/plot_nearest_centroid.py @@ -8,13 +8,13 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap + from sklearn import datasets -from sklearn.neighbors import NearestCentroid from sklearn.inspection import DecisionBoundaryDisplay - +from sklearn.neighbors import NearestCentroid # import some data to play with iris = datasets.load_iris() diff --git a/examples/neighbors/plot_regression.py b/examples/neighbors/plot_regression.py index 78b850d1a4e2c..d5ceba8a34860 100644 --- a/examples/neighbors/plot_regression.py +++ b/examples/neighbors/plot_regression.py @@ -18,8 +18,9 @@ # %% # Generate sample data # -------------------- -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import neighbors np.random.seed(0) diff --git a/examples/neighbors/plot_species_kde.py b/examples/neighbors/plot_species_kde.py index 35ea40158a45c..3783138dfcb76 100644 --- a/examples/neighbors/plot_species_kde.py +++ b/examples/neighbors/plot_species_kde.py @@ -40,8 +40,9 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_species_distributions from sklearn.neighbors import KernelDensity diff --git a/examples/neural_networks/plot_mlp_alpha.py b/examples/neural_networks/plot_mlp_alpha.py index 443d41f4707bf..b53beef54c115 100644 --- a/examples/neural_networks/plot_mlp_alpha.py +++ b/examples/neural_networks/plot_mlp_alpha.py @@ -23,11 +23,12 @@ import numpy as np from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap + +from sklearn.datasets import make_circles, make_classification, make_moons from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler h = 0.02 # step size in the mesh diff --git a/examples/neural_networks/plot_mlp_training_curves.py b/examples/neural_networks/plot_mlp_training_curves.py index 3fbddda879162..a9f03c2599a8e 100644 --- a/examples/neural_networks/plot_mlp_training_curves.py +++ b/examples/neural_networks/plot_mlp_training_curves.py @@ -18,10 +18,10 @@ import matplotlib.pyplot as plt -from sklearn.neural_network import MLPClassifier -from sklearn.preprocessing import MinMaxScaler from sklearn import datasets from sklearn.exceptions import ConvergenceWarning +from sklearn.neural_network import MLPClassifier +from sklearn.preprocessing import MinMaxScaler # different learning rate schedules and momentum parameters params = [ diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py index 03f615786e830..43e6a171fb696 100644 --- a/examples/neural_networks/plot_mnist_filters.py +++ b/examples/neural_networks/plot_mnist_filters.py @@ -25,11 +25,13 @@ """ import warnings + import matplotlib.pyplot as plt + from sklearn.datasets import fetch_openml from sklearn.exceptions import ConvergenceWarning -from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split +from sklearn.neural_network import MLPClassifier # Load data from https://www.openml.org/d/554 X, y = fetch_openml( diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py index de939922d9514..3ba878d4ad191 100644 --- a/examples/neural_networks/plot_rbm_logistic_classification.py +++ b/examples/neural_networks/plot_rbm_logistic_classification.py @@ -23,13 +23,11 @@ # linear shifts of 1 pixel in each direction. import numpy as np - from scipy.ndimage import convolve from sklearn import datasets -from sklearn.preprocessing import minmax_scale - from sklearn.model_selection import train_test_split +from sklearn.preprocessing import minmax_scale def nudge_dataset(X, Y): diff --git a/examples/preprocessing/plot_all_scaling.py b/examples/preprocessing/plot_all_scaling.py index 2893f5cf01ccb..c53c81a89727a 100644 --- a/examples/preprocessing/plot_all_scaling.py +++ b/examples/preprocessing/plot_all_scaling.py @@ -45,22 +45,22 @@ # Thomas Unterthiner # License: BSD 3 clause -import numpy as np - import matplotlib as mpl -from matplotlib import pyplot as plt +import numpy as np from matplotlib import cm - -from sklearn.preprocessing import MinMaxScaler -from sklearn.preprocessing import minmax_scale -from sklearn.preprocessing import MaxAbsScaler -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import RobustScaler -from sklearn.preprocessing import Normalizer -from sklearn.preprocessing import QuantileTransformer -from sklearn.preprocessing import PowerTransformer +from matplotlib import pyplot as plt from sklearn.datasets import fetch_california_housing +from sklearn.preprocessing import ( + MaxAbsScaler, + MinMaxScaler, + Normalizer, + PowerTransformer, + QuantileTransformer, + RobustScaler, + StandardScaler, + minmax_scale, +) dataset = fetch_california_housing() X_full, y_full = dataset.data, dataset.target diff --git a/examples/preprocessing/plot_discretization.py b/examples/preprocessing/plot_discretization.py index ffb3f9403634d..002d606da0c9d 100644 --- a/examples/preprocessing/plot_discretization.py +++ b/examples/preprocessing/plot_discretization.py @@ -31,8 +31,8 @@ # Hanmin Qin # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.linear_model import LinearRegression from sklearn.preprocessing import KBinsDiscretizer diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py index a35c56ea683d6..71adf44474aa3 100644 --- a/examples/preprocessing/plot_discretization_classification.py +++ b/examples/preprocessing/plot_discretization_classification.py @@ -33,20 +33,19 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.datasets import make_moons, make_circles, make_classification + +from sklearn.datasets import make_circles, make_classification, make_moons +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import KBinsDiscretizer +from sklearn.preprocessing import KBinsDiscretizer, StandardScaler from sklearn.svm import SVC, LinearSVC -from sklearn.ensemble import GradientBoostingClassifier from sklearn.utils._testing import ignore_warnings -from sklearn.exceptions import ConvergenceWarning h = 0.02 # step size in the mesh diff --git a/examples/preprocessing/plot_discretization_strategies.py b/examples/preprocessing/plot_discretization_strategies.py index 91904246540dd..b4c2f3ca1858d 100644 --- a/examples/preprocessing/plot_discretization_strategies.py +++ b/examples/preprocessing/plot_discretization_strategies.py @@ -19,11 +19,11 @@ # Author: Tom Dupré la Tour # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.preprocessing import KBinsDiscretizer from sklearn.datasets import make_blobs +from sklearn.preprocessing import KBinsDiscretizer strategies = ["uniform", "quantile", "kmeans"] diff --git a/examples/preprocessing/plot_map_data_to_normal.py b/examples/preprocessing/plot_map_data_to_normal.py index 42a61d84fa384..a521039098871 100644 --- a/examples/preprocessing/plot_map_data_to_normal.py +++ b/examples/preprocessing/plot_map_data_to_normal.py @@ -38,13 +38,11 @@ # Nicolas Hug # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.preprocessing import PowerTransformer -from sklearn.preprocessing import QuantileTransformer from sklearn.model_selection import train_test_split - +from sklearn.preprocessing import PowerTransformer, QuantileTransformer N_SAMPLES = 1000 FONT_SIZE = 6 diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py index 4e8f87b68b1d4..6e0ae0ae1c109 100644 --- a/examples/preprocessing/plot_scaling_importance.py +++ b/examples/preprocessing/plot_scaling_importance.py @@ -65,10 +65,10 @@ # of features. import matplotlib.pyplot as plt + from sklearn.inspection import DecisionBoundaryDisplay from sklearn.neighbors import KNeighborsClassifier - X_plot = X[["proline", "hue"]] X_plot_scaled = scaler.fit_transform(X_plot) clf = KNeighborsClassifier(n_neighbors=20) @@ -122,6 +122,7 @@ def fit_and_plot_model(X_plot, y, clf, ax): # We can inspect the first principal components using all the original features: import pandas as pd + from sklearn.decomposition import PCA pca = PCA(n_components=2).fit(X_train) @@ -199,8 +200,9 @@ def fit_and_plot_model(X_plot, y, clf, ax): # non-scaling of the data: import numpy as np -from sklearn.pipeline import make_pipeline + from sklearn.linear_model import LogisticRegressionCV +from sklearn.pipeline import make_pipeline Cs = np.logspace(-5, 5, 20) @@ -218,8 +220,7 @@ def fit_and_plot_model(X_plot, y, clf, ax): # was not scaled before applying PCA. We now evaluate the effect of scaling on # the accuracy and the mean log-loss of the optimal models: -from sklearn.metrics import accuracy_score -from sklearn.metrics import log_loss +from sklearn.metrics import accuracy_score, log_loss y_pred = unscaled_clf.predict(X_test) y_pred_scaled = scaled_clf.predict(X_test) diff --git a/examples/preprocessing/plot_target_encoder.py b/examples/preprocessing/plot_target_encoder.py index a50f0199e5ba8..d35990cfb2a9f 100644 --- a/examples/preprocessing/plot_target_encoder.py +++ b/examples/preprocessing/plot_target_encoder.py @@ -55,9 +55,7 @@ # strategies. First, we list out the encoders we will be using to preprocess # the categorical features: from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import OrdinalEncoder -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import TargetEncoder +from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, TargetEncoder categorical_preprocessors = [ ("drop", "drop"), @@ -71,9 +69,9 @@ # %% # Next, we evaluate the models using cross validation and record the results: -from sklearn.pipeline import make_pipeline -from sklearn.model_selection import cross_validate from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.model_selection import cross_validate +from sklearn.pipeline import make_pipeline n_cv_folds = 3 max_iter = 20 diff --git a/examples/preprocessing/plot_target_encoder_cross_val.py b/examples/preprocessing/plot_target_encoder_cross_val.py index 455625cc47460..a7066eeab29cb 100644 --- a/examples/preprocessing/plot_target_encoder_cross_val.py +++ b/examples/preprocessing/plot_target_encoder_cross_val.py @@ -21,9 +21,10 @@ # feature with medium cardinality, an uninformative feature with medium cardinality, # and an uninformative feature with high cardinality. First, we generate the informative # feature: -from sklearn.preprocessing import KBinsDiscretizer import numpy as np +from sklearn.preprocessing import KBinsDiscretizer + n_samples = 50_000 rng = np.random.RandomState(42) @@ -60,9 +61,10 @@ # %% # Finally, we assemble the dataset and perform a train test split: -from sklearn.model_selection import train_test_split import pandas as pd +from sklearn.model_selection import train_test_split + X = pd.DataFrame( np.concatenate( [X_informative, X_shuffled, X_near_unique_categories], @@ -80,8 +82,8 @@ # interval cross validation. First, we see the Ridge model trained on the # raw features will have low performance, because the order of the informative # feature is not informative: -from sklearn.linear_model import Ridge import sklearn +from sklearn.linear_model import Ridge # Configure transformers to always output DataFrames sklearn.set_config(transform_output="pandas") @@ -107,8 +109,8 @@ # %% # The coefficients of the linear model shows that most of the weight is on the # feature at column index 0, which is the informative feature -import pandas as pd import matplotlib.pyplot as plt +import pandas as pd plt.rcParams["figure.constrained_layout.use"] = True diff --git a/examples/release_highlights/plot_release_highlights_0_22_0.py b/examples/release_highlights/plot_release_highlights_0_22_0.py index 02b99df3491ee..ca09013703592 100644 --- a/examples/release_highlights/plot_release_highlights_0_22_0.py +++ b/examples/release_highlights/plot_release_highlights_0_22_0.py @@ -34,15 +34,15 @@ # :class:`~metrics.plot_confusion_matrix`. Read more about this new API in the # :ref:`User Guide `. -from sklearn.model_selection import train_test_split -from sklearn.svm import SVC +import matplotlib.pyplot as plt + +from sklearn.datasets import make_classification +from sklearn.ensemble import RandomForestClassifier # from sklearn.metrics import plot_roc_curve from sklearn.metrics import RocCurveDisplay - -from sklearn.ensemble import RandomForestClassifier -from sklearn.datasets import make_classification -import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.svm import SVC X, y = make_classification(random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) @@ -79,12 +79,12 @@ # Read more in the :ref:`User Guide `. from sklearn.datasets import load_iris -from sklearn.svm import LinearSVC -from sklearn.linear_model import LogisticRegression -from sklearn.preprocessing import StandardScaler -from sklearn.pipeline import make_pipeline from sklearn.ensemble import StackingClassifier +from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.svm import LinearSVC X, y = load_iris(return_X_y=True) estimators = [ @@ -102,8 +102,9 @@ # The :func:`inspection.permutation_importance` can be used to get an # estimate of the importance of each feature, for any fitted estimator: -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.inspection import permutation_importance @@ -155,8 +156,9 @@ # See more details in the :ref:`User Guide `. from tempfile import TemporaryDirectory -from sklearn.neighbors import KNeighborsTransformer + from sklearn.manifold import Isomap +from sklearn.neighbors import KNeighborsTransformer from sklearn.pipeline import make_pipeline X, y = make_classification(random_state=0) @@ -272,8 +274,8 @@ def test_sklearn_compatible_estimator(estimator, check): from sklearn.datasets import make_classification -from sklearn.svm import SVC from sklearn.metrics import roc_auc_score +from sklearn.svm import SVC X, y = make_classification(n_classes=4, n_informative=16) clf = SVC(decision_function_shape="ovo", probability=True).fit(X, y) diff --git a/examples/semi_supervised/plot_label_propagation_digits.py b/examples/semi_supervised/plot_label_propagation_digits.py index f848e3b76e084..bfdff8e362e47 100644 --- a/examples/semi_supervised/plot_label_propagation_digits.py +++ b/examples/semi_supervised/plot_label_propagation_digits.py @@ -24,9 +24,10 @@ class will be very good. # --------------- # # We use the digits dataset. We only use a subset of randomly selected samples. -from sklearn import datasets import numpy as np +from sklearn import datasets + digits = datasets.load_digits() rng = np.random.RandomState(2) indices = np.arange(len(digits.data)) @@ -59,8 +60,8 @@ class will be very good. # # We fit a :class:`~sklearn.semi_supervised.LabelSpreading` and use it to predict # the unknown labels. -from sklearn.semi_supervised import LabelSpreading from sklearn.metrics import classification_report +from sklearn.semi_supervised import LabelSpreading lp_model = LabelSpreading(gamma=0.25, max_iter=20) lp_model.fit(X, y_train) diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py index 3a1f533c8a281..215655a287c2d 100644 --- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py +++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py @@ -23,13 +23,13 @@ # Authors: Clay Woolam # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np from scipy import stats from sklearn import datasets -from sklearn.semi_supervised import LabelSpreading from sklearn.metrics import classification_report, confusion_matrix +from sklearn.semi_supervised import LabelSpreading digits = datasets.load_digits() rng = np.random.RandomState(0) diff --git a/examples/semi_supervised/plot_label_propagation_structure.py b/examples/semi_supervised/plot_label_propagation_structure.py index 5de6e9f20a7e3..cfcd1c1bf5a54 100644 --- a/examples/semi_supervised/plot_label_propagation_structure.py +++ b/examples/semi_supervised/plot_label_propagation_structure.py @@ -22,6 +22,7 @@ # Here, all labels but two are tagged as unknown. import numpy as np + from sklearn.datasets import make_circles n_samples = 200 diff --git a/examples/semi_supervised/plot_self_training_varying_threshold.py b/examples/semi_supervised/plot_self_training_varying_threshold.py index 801e48b8411f5..2c7a485d06eb0 100644 --- a/examples/semi_supervised/plot_self_training_varying_threshold.py +++ b/examples/semi_supervised/plot_self_training_varying_threshold.py @@ -32,13 +32,14 @@ # Authors: Oliver Rausch # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets -from sklearn.svm import SVC +from sklearn.metrics import accuracy_score from sklearn.model_selection import StratifiedKFold from sklearn.semi_supervised import SelfTrainingClassifier -from sklearn.metrics import accuracy_score +from sklearn.svm import SVC from sklearn.utils import shuffle n_splits = 3 diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py index 609f5d10247c2..58c7f6e42f408 100644 --- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py +++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py @@ -15,15 +15,13 @@ import numpy as np from sklearn.datasets import fetch_20newsgroups -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.feature_extraction.text import TfidfTransformer -from sklearn.preprocessing import FunctionTransformer +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.linear_model import SGDClassifier +from sklearn.metrics import f1_score from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline -from sklearn.semi_supervised import SelfTrainingClassifier -from sklearn.semi_supervised import LabelSpreading -from sklearn.metrics import f1_score +from sklearn.preprocessing import FunctionTransformer +from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier # Loading dataset containing first five categories data = fetch_20newsgroups( diff --git a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py index 402cd41d6a0f2..766f7ea0a79c6 100644 --- a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py +++ b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py @@ -18,13 +18,12 @@ # Oliver Rausch # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets +from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier from sklearn.svm import SVC -from sklearn.semi_supervised import LabelSpreading -from sklearn.semi_supervised import SelfTrainingClassifier - iris = datasets.load_iris() diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py index c2c3bc6e6ba28..cacd67ed056ac 100644 --- a/examples/svm/plot_custom_kernel.py +++ b/examples/svm/plot_custom_kernel.py @@ -8,9 +8,10 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn import svm, datasets +import numpy as np + +from sklearn import datasets, svm from sklearn.inspection import DecisionBoundaryDisplay # import some data to play with diff --git a/examples/svm/plot_iris_svc.py b/examples/svm/plot_iris_svc.py index 5bcc81dd91d04..61aba3cc06602 100644 --- a/examples/svm/plot_iris_svc.py +++ b/examples/svm/plot_iris_svc.py @@ -35,9 +35,9 @@ """ import matplotlib.pyplot as plt -from sklearn import svm, datasets -from sklearn.inspection import DecisionBoundaryDisplay +from sklearn import datasets, svm +from sklearn.inspection import DecisionBoundaryDisplay # import some data to play with iris = datasets.load_iris() diff --git a/examples/svm/plot_linearsvc_support_vectors.py b/examples/svm/plot_linearsvc_support_vectors.py index 638579f36f3c3..60e9a3e6f32f9 100644 --- a/examples/svm/plot_linearsvc_support_vectors.py +++ b/examples/svm/plot_linearsvc_support_vectors.py @@ -9,11 +9,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_blobs -from sklearn.svm import LinearSVC from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.svm import LinearSVC X, y = make_blobs(n_samples=40, centers=2, random_state=0) diff --git a/examples/svm/plot_oneclass.py b/examples/svm/plot_oneclass.py index 082cbcd6de2be..d4348fa0ec435 100644 --- a/examples/svm/plot_oneclass.py +++ b/examples/svm/plot_oneclass.py @@ -11,9 +11,10 @@ """ -import numpy as np -import matplotlib.pyplot as plt import matplotlib.font_manager +import matplotlib.pyplot as plt +import numpy as np + from sklearn import svm xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500)) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index fa4310134487a..ba0154b477b46 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -135,9 +135,8 @@ def __call__(self, value, clip=None): # 10 is often helpful. Using a basis of 2, a finer # tuning can be achieved but at a much higher cost. +from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit from sklearn.svm import SVC -from sklearn.model_selection import StratifiedShuffleSplit -from sklearn.model_selection import GridSearchCV C_range = np.logspace(-2, 10, 13) gamma_range = np.logspace(-9, 3, 13) diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index 45bacff6a2b97..23f464169f516 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -10,11 +10,11 @@ """ import matplotlib.pyplot as plt + from sklearn import svm from sklearn.datasets import make_blobs from sklearn.inspection import DecisionBoundaryDisplay - # we create 40 separable points X, y = make_blobs(n_samples=40, centers=2, random_state=6) diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index fe71420ffd0b3..6fd7de98f3fb6 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -26,6 +26,7 @@ """ import matplotlib.pyplot as plt + from sklearn import svm from sklearn.datasets import make_blobs from sklearn.inspection import DecisionBoundaryDisplay diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 3652fae3e979a..3d5a934bf4884 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -14,6 +14,7 @@ # Load some data to play with # --------------------------- import numpy as np + from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) @@ -25,8 +26,8 @@ # %% # Create the pipeline # ------------------- -from sklearn.pipeline import Pipeline from sklearn.feature_selection import SelectPercentile, f_classif +from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC @@ -45,6 +46,7 @@ # Plot the cross-validation score as a function of percentile of features # ----------------------------------------------------------------------- import matplotlib.pyplot as plt + from sklearn.model_selection import cross_val_score score_means = list() diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py index fac86e8a93c7a..7ff2486e1c867 100644 --- a/examples/svm/plot_svm_kernels.py +++ b/examples/svm/plot_svm_kernels.py @@ -13,10 +13,10 @@ # Code source: Gaël Varoquaux # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn import svm +import numpy as np +from sklearn import svm # Our dataset and targets X = np.c_[ diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index f3717ecaa24ed..b8253264a4ad0 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -17,8 +17,9 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import svm # we create 40 separable points diff --git a/examples/svm/plot_svm_nonlinear.py b/examples/svm/plot_svm_nonlinear.py index f88231b4b6af4..4990e509661a1 100644 --- a/examples/svm/plot_svm_nonlinear.py +++ b/examples/svm/plot_svm_nonlinear.py @@ -11,8 +11,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import svm xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index 75a16b571c3ea..ab34528a37af6 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -7,9 +7,10 @@ """ +import matplotlib.pyplot as plt import numpy as np + from sklearn.svm import SVR -import matplotlib.pyplot as plt # %% # Generate sample data diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py index 4ba025cffac8e..1e44fb361e6ba 100644 --- a/examples/svm/plot_svm_scale_c.py +++ b/examples/svm/plot_svm_scale_c.py @@ -76,7 +76,8 @@ # We will compute the mean test score for different values of `C`. import numpy as np import pandas as pd -from sklearn.model_selection import validation_curve, ShuffleSplit + +from sklearn.model_selection import ShuffleSplit, validation_curve Cs = np.logspace(-2.3, -1.3, 10) train_sizes = np.linspace(0.3, 0.7, 3) diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py index 93148225b0bb3..848b81dee9c69 100644 --- a/examples/svm/plot_svm_tie_breaking.py +++ b/examples/svm/plot_svm_tie_breaking.py @@ -17,10 +17,11 @@ # Code source: Andreas Mueller, Adrin Jalali # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn.svm import SVC +import numpy as np + from sklearn.datasets import make_blobs +from sklearn.svm import SVC X, y = make_blobs(random_state=27) diff --git a/examples/svm/plot_weighted_samples.py b/examples/svm/plot_weighted_samples.py index f346599300aba..c17742e091390 100644 --- a/examples/svm/plot_weighted_samples.py +++ b/examples/svm/plot_weighted_samples.py @@ -14,8 +14,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import svm diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py index 633a0602d421b..04aad46c8451a 100644 --- a/examples/text/plot_document_classification_20newsgroups.py +++ b/examples/text/plot_document_classification_20newsgroups.py @@ -36,9 +36,10 @@ # the classification problem "too easy". This is achieved using simple # heuristics that are neither perfect nor standard, hence disabled by default. +from time import time + from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import TfidfVectorizer -from time import time categories = [ "alt.atheism", @@ -158,6 +159,7 @@ def load_dataset(verbose=False, remove=()): # in the classification errors. import matplotlib.pyplot as plt + from sklearn.metrics import ConfusionMatrixDisplay fig, ax = plt.subplots(figsize=(10, 5)) @@ -182,8 +184,8 @@ def load_dataset(verbose=False, remove=()): # We can gain a deeper understanding of how this classifier makes its decisions # by looking at the words with the highest average feature effects: -import pandas as pd import numpy as np +import pandas as pd def plot_feature_effects(): @@ -315,8 +317,8 @@ def plot_feature_effects(): # training time and testing time. For such purpose we define the following # benchmarking utilities: -from sklearn.utils.extmath import density from sklearn import metrics +from sklearn.utils.extmath import density def benchmark(clf, custom_name=False): @@ -361,14 +363,11 @@ def benchmark(clf, custom_name=False): # :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` # noqa: E501 # for a demo on how such tuning can be done. -from sklearn.linear_model import LogisticRegression -from sklearn.svm import LinearSVC -from sklearn.linear_model import SGDClassifier -from sklearn.naive_bayes import ComplementNB -from sklearn.neighbors import KNeighborsClassifier -from sklearn.neighbors import NearestCentroid from sklearn.ensemble import RandomForestClassifier - +from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.naive_bayes import ComplementNB +from sklearn.neighbors import KNeighborsClassifier, NearestCentroid +from sklearn.svm import LinearSVC results = [] for clf, name in ( diff --git a/examples/text/plot_document_clustering.py b/examples/text/plot_document_clustering.py index 368cf7cea60ae..fa68b8bd312ea 100644 --- a/examples/text/plot_document_clustering.py +++ b/examples/text/plot_document_clustering.py @@ -46,6 +46,7 @@ # strip those features and have a more sensible clustering problem. import numpy as np + from sklearn.datasets import fetch_20newsgroups categories = [ @@ -104,9 +105,10 @@ # For more reference, see :ref:`clustering_evaluation`. from collections import defaultdict -from sklearn import metrics from time import time +from sklearn import metrics + evaluations = [] evaluations_std = [] @@ -277,7 +279,6 @@ def fit_and_evaluate(km, X, name=None, n_runs=5): from sklearn.pipeline import make_pipeline from sklearn.preprocessing import Normalizer - lsa = make_pipeline(TruncatedSVD(n_components=100), Normalizer(copy=False)) t0 = time() X_lsa = lsa.fit_transform(X_tfidf) @@ -353,8 +354,7 @@ def fit_and_evaluate(km, X, name=None, n_runs=5): # case we also add LSA to the pipeline to reduce the dimension and sparcity of # the hashed vector space. -from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.feature_extraction.text import HashingVectorizer, TfidfTransformer lsa_vectorizer = make_pipeline( HashingVectorizer(stop_words="english", n_features=50_000), @@ -394,8 +394,8 @@ def fit_and_evaluate(km, X, name=None, n_runs=5): # Clustering evaluation summary # ============================== -import pandas as pd import matplotlib.pyplot as plt +import pandas as pd fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(16, 6), sharey=True) diff --git a/examples/text/plot_hashing_vs_dict_vectorizer.py b/examples/text/plot_hashing_vs_dict_vectorizer.py index 8200c646f69ee..ce2dcc2d13c41 100644 --- a/examples/text/plot_hashing_vs_dict_vectorizer.py +++ b/examples/text/plot_hashing_vs_dict_vectorizer.py @@ -118,6 +118,7 @@ def token_freqs(doc): # both of them receive dictionaries as input. from time import time + from sklearn.feature_extraction import DictVectorizer dict_count_vectorizers = defaultdict(list) diff --git a/examples/tree/plot_cost_complexity_pruning.py b/examples/tree/plot_cost_complexity_pruning.py index d21d163c9a1e3..b232389ea9ded 100644 --- a/examples/tree/plot_cost_complexity_pruning.py +++ b/examples/tree/plot_cost_complexity_pruning.py @@ -18,8 +18,9 @@ """ import matplotlib.pyplot as plt -from sklearn.model_selection import train_test_split + from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier # %% diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py index 14f6506b5810f..b3d834da5d067 100644 --- a/examples/tree/plot_iris_dtc.py +++ b/examples/tree/plot_iris_dtc.py @@ -23,13 +23,12 @@ # %% # Display the decision functions of trees trained on all pairs of features. -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import load_iris -from sklearn.tree import DecisionTreeClassifier from sklearn.inspection import DecisionBoundaryDisplay - +from sklearn.tree import DecisionTreeClassifier # Parameters n_classes = 3 diff --git a/examples/tree/plot_tree_regression.py b/examples/tree/plot_tree_regression.py index 6ed28a5cbfa99..5a3da0b7b6d06 100644 --- a/examples/tree/plot_tree_regression.py +++ b/examples/tree/plot_tree_regression.py @@ -15,9 +15,10 @@ """ # Import the necessary modules and libraries +import matplotlib.pyplot as plt import numpy as np + from sklearn.tree import DecisionTreeRegressor -import matplotlib.pyplot as plt # Create a random dataset rng = np.random.RandomState(1) diff --git a/examples/tree/plot_tree_regression_multioutput.py b/examples/tree/plot_tree_regression_multioutput.py index a75652a6ddd56..b6d2800d2732d 100644 --- a/examples/tree/plot_tree_regression_multioutput.py +++ b/examples/tree/plot_tree_regression_multioutput.py @@ -15,8 +15,9 @@ details of the training data and learn from the noise, i.e. they overfit. """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.tree import DecisionTreeRegressor # Create a random dataset diff --git a/examples/tree/plot_unveil_tree_structure.py b/examples/tree/plot_unveil_tree_structure.py index 6313d0ccbb74f..d4009e3111f7f 100644 --- a/examples/tree/plot_unveil_tree_structure.py +++ b/examples/tree/plot_unveil_tree_structure.py @@ -19,10 +19,10 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn.model_selection import train_test_split +from sklearn import tree from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier -from sklearn import tree ############################################################################## # Train tree classifier diff --git a/maint_tools/check_pxd_in_installation.py b/maint_tools/check_pxd_in_installation.py index ac1a8f9627a95..996d45d64d42a 100644 --- a/maint_tools/check_pxd_in_installation.py +++ b/maint_tools/check_pxd_in_installation.py @@ -6,12 +6,11 @@ """ import os -import sys import pathlib +import subprocess +import sys import tempfile import textwrap -import subprocess - sklearn_dir = pathlib.Path(sys.argv[1]) pxd_files = list(sklearn_dir.glob("**/*.pxd")) diff --git a/maint_tools/sort_whats_new.py b/maint_tools/sort_whats_new.py index 178e33bc87e5f..7241059176b66 100755 --- a/maint_tools/sort_whats_new.py +++ b/maint_tools/sort_whats_new.py @@ -2,8 +2,8 @@ # Sorts what's new entries with per-module headings. # Pass what's new entries on stdin. -import sys import re +import sys from collections import defaultdict LABEL_ORDER = ["MajorFeature", "Feature", "Efficiency", "Enhancement", "Fix", "API"] diff --git a/maint_tools/update_tracking_issue.py b/maint_tools/update_tracking_issue.py index 4ddc9d1bfe8e6..725802416fb6c 100644 --- a/maint_tools/update_tracking_issue.py +++ b/maint_tools/update_tracking_issue.py @@ -11,10 +11,10 @@ github account that does **not** have commit access to the public repo. """ -from pathlib import Path -import sys import argparse +import sys from datetime import datetime, timezone +from pathlib import Path import defusedxml.ElementTree as ET from github import Github diff --git a/pyproject.toml b/pyproject.toml index bed85b074dbfb..efd72adf44392 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,9 @@ exclude = ''' ''' [tool.ruff] +# all rules can be found here: https://beta.ruff.rs/docs/rules/ +select = ["E", "F", "W", "I"] + # max line length for black line-length = 88 target-version = "py38" @@ -74,6 +77,7 @@ exclude=[ "examples/*"=["E402"] "doc/conf.py"=["E402"] + [tool.cython-lint] # Ignore the same error codes as ruff # + E501 (line too long) because keeping it < 88 in cython diff --git a/setup.py b/setup.py index 33d105a213a7c..5af738f5f841f 100755 --- a/setup.py +++ b/setup.py @@ -4,18 +4,17 @@ # 2010 Fabian Pedregosa # License: 3-clause BSD -import sys +import importlib import os -from os.path import join import platform import shutil +import sys +import traceback +from os.path import join from setuptools import Command, Extension, setup from setuptools.command.build_ext import build_ext -import traceback -import importlib - try: import builtins except ImportError: @@ -454,10 +453,10 @@ def configure_extension_modules(): if "sdist" in sys.argv or "--help" in sys.argv: return [] - from sklearn._build_utils import cythonize_extensions - from sklearn._build_utils import gen_from_templates import numpy + from sklearn._build_utils import cythonize_extensions, gen_from_templates + is_pypy = platform.python_implementation() == "PyPy" np_include = numpy.get_include() default_optimization_level = "O2" diff --git a/sklearn/__init__.py b/sklearn/__init__.py index d38a949d38208..c021d492fe061 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -12,13 +12,12 @@ See http://scikit-learn.org for complete documentation. """ -import sys import logging import os import random +import sys - -from ._config import get_config, set_config, config_context +from ._config import config_context, get_config, set_config logger = logging.getLogger(__name__) @@ -77,8 +76,10 @@ # It is necessary to do this prior to importing show_versions as the # later is linked to the OpenMP runtime to make it possible to introspect # it and importing it first would fail if the OpenMP dll cannot be found. - from . import _distributor_init # noqa: F401 - from . import __check_build # noqa: F401 + from . import ( + __check_build, # noqa: F401 + _distributor_init, # noqa: F401 + ) from .base import clone from .utils._show_versions import show_versions diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py index f84dfa09a9f94..056215e162647 100644 --- a/sklearn/_build_utils/__init__.py +++ b/sklearn/_build_utils/__init__.py @@ -5,15 +5,15 @@ # license: BSD +import contextlib import os + import sklearn -import contextlib -from .pre_build_helpers import basic_check_build -from .openmp_helpers import check_openmp_support from .._min_dependencies import CYTHON_MIN_VERSION from ..externals._packaging.version import parse - +from .openmp_helpers import check_openmp_support +from .pre_build_helpers import basic_check_build DEFAULT_ROOT = "sklearn" diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py index c1d50abd3ae0c..f3eb054bb037e 100644 --- a/sklearn/_build_utils/pre_build_helpers.py +++ b/sklearn/_build_utils/pre_build_helpers.py @@ -1,11 +1,11 @@ """Helpers to check build environment before actual build of scikit-learn""" +import glob import os +import subprocess import sys -import glob import tempfile import textwrap -import subprocess from setuptools.command.build_ext import customize_compiler, new_compiler diff --git a/sklearn/_config.py b/sklearn/_config.py index 43755071e54e9..e84dc9ef5b228 100644 --- a/sklearn/_config.py +++ b/sklearn/_config.py @@ -1,8 +1,8 @@ """Global configuration state and functions for management """ import os -from contextlib import contextmanager as contextmanager import threading +from contextlib import contextmanager as contextmanager _global_config = { "assume_finite": bool(os.environ.get("SKLEARN_ASSUME_FINITE", False)), diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py index 78b1eb8543c8d..ee15e693c16f6 100644 --- a/sklearn/_loss/__init__.py +++ b/sklearn/_loss/__init__.py @@ -4,19 +4,18 @@ """ from .loss import ( - HalfSquaredError, AbsoluteError, - PinballLoss, - HuberLoss, - HalfPoissonLoss, + HalfBinomialLoss, HalfGammaLoss, + HalfMultinomialLoss, + HalfPoissonLoss, + HalfSquaredError, HalfTweedieLoss, HalfTweedieLossIdentity, - HalfBinomialLoss, - HalfMultinomialLoss, + HuberLoss, + PinballLoss, ) - __all__ = [ "HalfSquaredError", "AbsoluteError", diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py index 510ef80c641fc..9459844f6b89a 100644 --- a/sklearn/_loss/link.py +++ b/sklearn/_loss/link.py @@ -9,6 +9,7 @@ import numpy as np from scipy.special import expit, logit from scipy.stats import gmean + from ..utils.extmath import softmax diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py index 037d933aa5491..f3b61da0915d5 100644 --- a/sklearn/_loss/loss.py +++ b/sklearn/_loss/loss.py @@ -16,31 +16,33 @@ # - Replace link module of GLMs. import numbers + import numpy as np from scipy.special import xlogy + +from ..utils import check_scalar +from ..utils.stats import _weighted_percentile from ._loss import ( - CyHalfSquaredError, CyAbsoluteError, - CyPinballLoss, - CyHuberLoss, - CyHalfPoissonLoss, + CyExponentialLoss, + CyHalfBinomialLoss, CyHalfGammaLoss, + CyHalfMultinomialLoss, + CyHalfPoissonLoss, + CyHalfSquaredError, CyHalfTweedieLoss, CyHalfTweedieLossIdentity, - CyHalfBinomialLoss, - CyHalfMultinomialLoss, - CyExponentialLoss, + CyHuberLoss, + CyPinballLoss, ) from .link import ( - Interval, + HalfLogitLink, IdentityLink, - LogLink, + Interval, LogitLink, - HalfLogitLink, + LogLink, MultinomialLogit, ) -from ..utils import check_scalar -from ..utils.stats import _weighted_percentile # Note: The shape of raw_prediction for multiclass classifications are diff --git a/sklearn/_loss/tests/test_link.py b/sklearn/_loss/tests/test_link.py index 8421fd3fd7a77..e5a665f8d48ac 100644 --- a/sklearn/_loss/tests/test_link.py +++ b/sklearn/_loss/tests/test_link.py @@ -1,16 +1,15 @@ import numpy as np -from numpy.testing import assert_allclose, assert_array_equal import pytest +from numpy.testing import assert_allclose, assert_array_equal from sklearn._loss.link import ( _LINKS, - _inclusive_low_high, HalfLogitLink, - MultinomialLogit, Interval, + MultinomialLogit, + _inclusive_low_high, ) - LINK_FUNCTIONS = list(_LINKS.values()) diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py index dbfe5b3829dda..d279a2f06a182 100644 --- a/sklearn/_loss/tests/test_loss.py +++ b/sklearn/_loss/tests/test_loss.py @@ -1,22 +1,22 @@ import pickle import numpy as np -from numpy.testing import assert_allclose, assert_array_equal import pytest +from numpy.testing import assert_allclose, assert_array_equal from pytest import approx from scipy.optimize import ( + LinearConstraint, minimize, minimize_scalar, newton, - LinearConstraint, ) from scipy.special import logsumexp -from sklearn._loss.link import _inclusive_low_high, IdentityLink +from sklearn._loss.link import IdentityLink, _inclusive_low_high from sklearn._loss.loss import ( _LOSSES, - BaseLoss, AbsoluteError, + BaseLoss, HalfBinomialLoss, HalfGammaLoss, HalfMultinomialLoss, @@ -30,7 +30,6 @@ from sklearn.utils import assert_all_finite from sklearn.utils._testing import create_memmap_backed_data, skip_if_32bit - ALL_LOSSES = list(_LOSSES.values()) LOSS_INSTANCES = [loss() for loss in ALL_LOSSES] diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index 72ee14d64e958..e12720dbd5b94 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -1,8 +1,7 @@ """All minimum dependencies for scikit-learn.""" -from collections import defaultdict -import platform import argparse - +import platform +from collections import defaultdict # scipy and cython should by in sync with pyproject.toml diff --git a/sklearn/base.py b/sklearn/base.py index 40bf041a30c13..e62a0a01214bf 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -5,33 +5,36 @@ import copy import functools -import warnings -from collections import defaultdict -import platform import inspect +import platform import re +import warnings +from collections import defaultdict import numpy as np from . import __version__ -from ._config import get_config, config_context +from ._config import config_context, get_config +from .exceptions import InconsistentVersionWarning from .utils import _IS_32BIT +from .utils._estimator_html_repr import estimator_html_repr +from .utils._metadata_requests import _MetadataRequester +from .utils._param_validation import validate_parameter_constraints from .utils._set_output import _SetOutputMixin from .utils._tags import ( _DEFAULT_TAGS, ) -from .exceptions import InconsistentVersionWarning -from .utils.validation import check_X_y -from .utils.validation import check_array -from .utils.validation import _check_y -from .utils.validation import _num_features -from .utils.validation import _check_feature_names_in -from .utils.validation import _generate_get_feature_names_out -from .utils.validation import _is_fitted, check_is_fitted -from .utils._metadata_requests import _MetadataRequester -from .utils.validation import _get_feature_names -from .utils._estimator_html_repr import estimator_html_repr -from .utils._param_validation import validate_parameter_constraints +from .utils.validation import ( + _check_feature_names_in, + _check_y, + _generate_get_feature_names_out, + _get_feature_names, + _is_fitted, + _num_features, + check_array, + check_is_fitted, + check_X_y, +) def clone(estimator, *, safe=True): diff --git a/sklearn/calibration.py b/sklearn/calibration.py index e4869387f4166..42df0b3248733 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -7,43 +7,51 @@ # # License: BSD 3 clause -from numbers import Integral, Real import warnings -from inspect import signature from functools import partial - +from inspect import signature from math import log -import numpy as np +from numbers import Integral, Real -from scipy.special import expit -from scipy.special import xlogy +import numpy as np from scipy.optimize import fmin_bfgs +from scipy.special import expit, xlogy + +from sklearn.utils import Bunch from .base import ( BaseEstimator, ClassifierMixin, - RegressorMixin, - clone, MetaEstimatorMixin, + RegressorMixin, _fit_context, + clone, ) -from .preprocessing import label_binarize, LabelEncoder +from .isotonic import IsotonicRegression +from .model_selection import check_cv, cross_val_predict +from .preprocessing import LabelEncoder, label_binarize +from .svm import LinearSVC from .utils import ( + _safe_indexing, column_or_1d, indexable, - _safe_indexing, ) - -from .utils.multiclass import check_classification_targets -from .utils.parallel import delayed, Parallel from .utils._param_validation import ( - StrOptions, HasMethods, Hidden, - validate_params, Interval, + StrOptions, + validate_params, ) from .utils._plotting import _BinaryClassifierCurveDisplayMixin +from .utils.metadata_routing import ( + MetadataRouter, + MethodMapping, + _routing_enabled, + process_routing, +) +from .utils.multiclass import check_classification_targets +from .utils.parallel import Parallel, delayed from .utils.validation import ( _check_fit_params, _check_pos_label_consistency, @@ -52,16 +60,6 @@ check_consistent_length, check_is_fitted, ) -from .isotonic import IsotonicRegression -from .svm import LinearSVC -from .model_selection import check_cv, cross_val_predict -from sklearn.utils import Bunch -from .utils.metadata_routing import ( - MetadataRouter, - MethodMapping, - process_routing, - _routing_enabled, -) class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py index 40b89ea0da8ba..f5d3104d816bf 100644 --- a/sklearn/cluster/__init__.py +++ b/sklearn/cluster/__init__.py @@ -3,27 +3,27 @@ algorithms. """ -from ._spectral import spectral_clustering, SpectralClustering -from ._mean_shift import mean_shift, MeanShift, estimate_bandwidth, get_bin_seeds -from ._affinity_propagation import affinity_propagation, AffinityPropagation +from ._affinity_propagation import AffinityPropagation, affinity_propagation from ._agglomerative import ( - ward_tree, AgglomerativeClustering, - linkage_tree, FeatureAgglomeration, + linkage_tree, + ward_tree, ) -from ._kmeans import k_means, KMeans, MiniBatchKMeans, kmeans_plusplus +from ._bicluster import SpectralBiclustering, SpectralCoclustering +from ._birch import Birch from ._bisect_k_means import BisectingKMeans -from ._dbscan import dbscan, DBSCAN +from ._dbscan import DBSCAN, dbscan +from ._hdbscan.hdbscan import HDBSCAN +from ._kmeans import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus +from ._mean_shift import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift from ._optics import ( OPTICS, cluster_optics_dbscan, - compute_optics_graph, cluster_optics_xi, + compute_optics_graph, ) -from ._bicluster import SpectralBiclustering, SpectralCoclustering -from ._birch import Birch -from ._hdbscan.hdbscan import HDBSCAN +from ._spectral import SpectralClustering, spectral_clustering __all__ = [ "AffinityPropagation", diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 1ffc5f07e8c50..6e7f67fed5199 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -5,20 +5,18 @@ # License: BSD 3 clause -from numbers import Integral, Real import warnings +from numbers import Integral, Real import numpy as np +from .._config import config_context +from ..base import BaseEstimator, ClusterMixin, _fit_context from ..exceptions import ConvergenceWarning -from ..base import BaseEstimator, ClusterMixin -from ..base import _fit_context +from ..metrics import euclidean_distances, pairwise_distances_argmin from ..utils import check_random_state from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils.validation import check_is_fitted -from ..metrics import euclidean_distances -from ..metrics import pairwise_distances_argmin -from .._config import config_context def _equal_similarities_and_preferences(S, preference): diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index b7d08a45dcd80..553908104c92b 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -15,22 +15,25 @@ from scipy import sparse from scipy.sparse.csgraph import connected_components -from ..base import BaseEstimator, ClusterMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context -from ..metrics.pairwise import paired_distances -from ..metrics.pairwise import _VALID_METRICS +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + ClusterMixin, + _fit_context, +) from ..metrics import DistanceMetric from ..metrics._dist_metrics import METRIC_MAPPING64 +from ..metrics.pairwise import _VALID_METRICS, paired_distances from ..utils import check_array from ..utils._fast_dict import IntFloatDict -from ..utils.graph import _fix_connected_components from ..utils._param_validation import ( + HasMethods, Hidden, Interval, StrOptions, - HasMethods, validate_params, ) +from ..utils.graph import _fix_connected_components from ..utils.validation import check_memory # mypy error: Module 'sklearn.cluster' has no attribute '_hierarchical_fast' diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py index 4133264626ebb..65280c06319d9 100644 --- a/sklearn/cluster/_bicluster.py +++ b/sklearn/cluster/_bicluster.py @@ -3,25 +3,19 @@ # License: BSD 3 clause from abc import ABCMeta, abstractmethod - -import numpy as np from numbers import Integral +import numpy as np from scipy.linalg import norm from scipy.sparse import dia_matrix, issparse from scipy.sparse.linalg import eigsh, svds -from . import KMeans, MiniBatchKMeans -from ..base import BaseEstimator, BiclusterMixin -from ..base import _fit_context -from ..utils import check_random_state -from ..utils import check_scalar - +from ..base import BaseEstimator, BiclusterMixin, _fit_context +from ..utils import check_random_state, check_scalar +from ..utils._param_validation import Interval, StrOptions from ..utils.extmath import make_nonnegative, randomized_svd, safe_sparse_dot - from ..utils.validation import assert_all_finite -from ..utils._param_validation import Interval, StrOptions - +from ._kmeans import KMeans, MiniBatchKMeans __all__ = ["SpectralCoclustering", "SpectralBiclustering"] diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index e74630572a014..d62fb880ba8b2 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -4,26 +4,27 @@ # License: BSD 3 clause import warnings -import numpy as np +from math import sqrt from numbers import Integral, Real + +import numpy as np from scipy import sparse -from math import sqrt -from ..metrics import pairwise_distances_argmin -from ..metrics.pairwise import euclidean_distances +from .._config import config_context from ..base import ( - TransformerMixin, - ClusterMixin, BaseEstimator, ClassNamePrefixFeaturesOutMixin, + ClusterMixin, + TransformerMixin, _fit_context, ) -from ..utils.extmath import row_norms +from ..exceptions import ConvergenceWarning +from ..metrics import pairwise_distances_argmin +from ..metrics.pairwise import euclidean_distances from ..utils._param_validation import Interval +from ..utils.extmath import row_norms from ..utils.validation import check_is_fitted -from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering -from .._config import config_context def _iterate_sparse_X(X): diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index 959d78ae85009..9091445261f70 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -7,18 +7,17 @@ import scipy.sparse as sp from ..base import _fit_context -from ._kmeans import _BaseKMeans -from ._kmeans import _kmeans_single_elkan -from ._kmeans import _kmeans_single_lloyd -from ._kmeans import _labels_inertia_threadpool_limit -from ._k_means_common import _inertia_dense -from ._k_means_common import _inertia_sparse -from ..utils.extmath import row_norms from ..utils._openmp_helpers import _openmp_effective_n_threads -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_sample_weight -from ..utils.validation import check_random_state from ..utils._param_validation import StrOptions +from ..utils.extmath import row_norms +from ..utils.validation import _check_sample_weight, check_is_fitted, check_random_state +from ._k_means_common import _inertia_dense, _inertia_sparse +from ._kmeans import ( + _BaseKMeans, + _kmeans_single_elkan, + _kmeans_single_lloyd, + _labels_inertia_threadpool_limit, +) class _BisectingTree: diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 3c753935ac046..e3ba62dbfdf01 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -14,12 +14,11 @@ import numpy as np from scipy import sparse +from ..base import BaseEstimator, ClusterMixin, _fit_context from ..metrics.pairwise import _VALID_METRICS -from ..base import BaseEstimator, ClusterMixin -from ..base import _fit_context -from ..utils.validation import _check_sample_weight -from ..utils._param_validation import Interval, StrOptions from ..neighbors import NearestNeighbors +from ..utils._param_validation import Interval, StrOptions +from ..utils.validation import _check_sample_weight from ._dbscan_inner import dbscan_inner diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 55baf247a2931..f84f18c1c18b3 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -6,12 +6,13 @@ # License: BSD 3 clause import warnings + import numpy as np +from scipy.sparse import issparse from ..base import TransformerMixin -from ..utils.validation import check_is_fitted from ..utils import metadata_routing -from scipy.sparse import issparse +from ..utils.validation import check_is_fitted ############################################################################### # Mixin class for feature agglomeration. diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py index f1584f46d6f82..fa6c1950b1164 100644 --- a/sklearn/cluster/_hdbscan/hdbscan.py +++ b/sklearn/cluster/_hdbscan/hdbscan.py @@ -46,16 +46,15 @@ from ...metrics._dist_metrics import DistanceMetric from ...neighbors import BallTree, KDTree, NearestNeighbors from ...utils._param_validation import Interval, StrOptions -from ...utils.validation import _assert_all_finite, _allclose_dense_sparse -from ._reachability import mutual_reachability_graph +from ...utils.validation import _allclose_dense_sparse, _assert_all_finite from ._linkage import ( + MST_edge_dtype, make_single_linkage, - mst_from_mutual_reachability, mst_from_data_matrix, - MST_edge_dtype, + mst_from_mutual_reachability, ) -from ._tree import tree_to_labels, labelling_at_cut -from ._tree import HIERARCHY_dtype +from ._reachability import mutual_reachability_graph +from ._tree import HIERARCHY_dtype, labelling_at_cut, tree_to_labels FAST_METRICS = set(KDTree.valid_metrics() + BallTree.valid_metrics()) diff --git a/sklearn/cluster/_hdbscan/tests/test_reachibility.py b/sklearn/cluster/_hdbscan/tests/test_reachibility.py index c8ba28d0af25b..c25b6baf4b65c 100644 --- a/sklearn/cluster/_hdbscan/tests/test_reachibility.py +++ b/sklearn/cluster/_hdbscan/tests/test_reachibility.py @@ -1,13 +1,12 @@ import numpy as np import pytest +from sklearn.cluster._hdbscan._reachability import mutual_reachability_graph from sklearn.utils._testing import ( _convert_container, assert_allclose, ) -from sklearn.cluster._hdbscan._reachability import mutual_reachability_graph - def test_mutual_reachability_graph_error_sparse_format(): """Check that we raise an error if the sparse format is not CSR.""" diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index b36999885a14e..79aa8b3825170 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -11,50 +11,48 @@ # Robert Layton # License: BSD 3 clause +import warnings from abc import ABC, abstractmethod from numbers import Integral, Real -import warnings import numpy as np import scipy.sparse as sp from ..base import ( BaseEstimator, + ClassNamePrefixFeaturesOutMixin, ClusterMixin, TransformerMixin, - ClassNamePrefixFeaturesOutMixin, _fit_context, ) -from ..metrics.pairwise import euclidean_distances -from ..metrics.pairwise import _euclidean_distances +from ..exceptions import ConvergenceWarning +from ..metrics.pairwise import _euclidean_distances, euclidean_distances +from ..utils import check_array, check_random_state +from ..utils._openmp_helpers import _openmp_effective_n_threads +from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params from ..utils.extmath import row_norms, stable_cumsum -from ..utils.fixes import threadpool_limits -from ..utils.fixes import threadpool_info -from ..utils.sparsefuncs_fast import assign_rows_csr +from ..utils.fixes import threadpool_info, threadpool_limits from ..utils.sparsefuncs import mean_variance_axis -from ..utils import check_array -from ..utils import check_random_state -from ..utils.validation import check_is_fitted, _check_sample_weight -from ..utils.validation import _is_arraylike_not_scalar -from ..utils._param_validation import Hidden -from ..utils._param_validation import Interval -from ..utils._param_validation import StrOptions -from ..utils._param_validation import validate_params -from ..utils._openmp_helpers import _openmp_effective_n_threads -from ..exceptions import ConvergenceWarning -from ._k_means_common import CHUNK_SIZE -from ._k_means_common import _inertia_dense -from ._k_means_common import _inertia_sparse -from ._k_means_common import _is_same_clustering -from ._k_means_minibatch import _minibatch_update_dense -from ._k_means_minibatch import _minibatch_update_sparse -from ._k_means_lloyd import lloyd_iter_chunked_dense -from ._k_means_lloyd import lloyd_iter_chunked_sparse -from ._k_means_elkan import init_bounds_dense -from ._k_means_elkan import init_bounds_sparse -from ._k_means_elkan import elkan_iter_chunked_dense -from ._k_means_elkan import elkan_iter_chunked_sparse - +from ..utils.sparsefuncs_fast import assign_rows_csr +from ..utils.validation import ( + _check_sample_weight, + _is_arraylike_not_scalar, + check_is_fitted, +) +from ._k_means_common import ( + CHUNK_SIZE, + _inertia_dense, + _inertia_sparse, + _is_same_clustering, +) +from ._k_means_elkan import ( + elkan_iter_chunked_dense, + elkan_iter_chunked_sparse, + init_bounds_dense, + init_bounds_sparse, +) +from ._k_means_lloyd import lloyd_iter_chunked_dense, lloyd_iter_chunked_sparse +from ._k_means_minibatch import _minibatch_update_dense, _minibatch_update_sparse ############################################################################### # Initialization heuristic diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index 6b0f227d011f9..ab9b8e85beadb 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -14,20 +14,20 @@ # Gael Varoquaux # Martino Sorbaro -import numpy as np import warnings +from collections import defaultdict from numbers import Integral, Real -from collections import defaultdict +import numpy as np + +from .._config import config_context +from ..base import BaseEstimator, ClusterMixin, _fit_context +from ..metrics.pairwise import pairwise_distances_argmin +from ..neighbors import NearestNeighbors +from ..utils import check_array, check_random_state, gen_batches from ..utils._param_validation import Interval, validate_params +from ..utils.parallel import Parallel, delayed from ..utils.validation import check_is_fitted -from ..utils.parallel import delayed, Parallel -from ..utils import check_random_state, gen_batches, check_array -from ..base import BaseEstimator, ClusterMixin -from ..base import _fit_context -from ..neighbors import NearestNeighbors -from ..metrics.pairwise import pairwise_distances_argmin -from .._config import config_context @validate_params( diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index ca1c74d6f44e7..8a91cd6f5a383 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -10,23 +10,26 @@ License: BSD 3 clause """ +import warnings from numbers import Integral, Real -import warnings import numpy as np +from scipy.sparse import SparseEfficiencyWarning, issparse +from ..base import BaseEstimator, ClusterMixin, _fit_context from ..exceptions import DataConversionWarning -from ..metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS -from ..metrics.pairwise import _VALID_METRICS +from ..metrics import pairwise_distances +from ..metrics.pairwise import _VALID_METRICS, PAIRWISE_BOOLEAN_FUNCTIONS +from ..neighbors import NearestNeighbors from ..utils import gen_batches, get_chunk_n_rows -from ..utils._param_validation import Interval, HasMethods, StrOptions, validate_params -from ..utils._param_validation import RealNotInt +from ..utils._param_validation import ( + HasMethods, + Interval, + RealNotInt, + StrOptions, + validate_params, +) from ..utils.validation import check_memory -from ..neighbors import NearestNeighbors -from ..base import BaseEstimator, ClusterMixin -from ..base import _fit_context -from ..metrics import pairwise_distances -from scipy.sparse import issparse, SparseEfficiencyWarning class OPTICS(ClusterMixin, BaseEstimator): diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py index f72db4b7c1da3..d5fc9d4fdc68f 100644 --- a/sklearn/cluster/_spectral.py +++ b/sklearn/cluster/_spectral.py @@ -6,21 +6,19 @@ # Andrew Knyazev # License: BSD 3 clause -from numbers import Integral, Real import warnings +from numbers import Integral, Real import numpy as np - from scipy.linalg import LinAlgError, qr, svd from scipy.sparse import csc_matrix -from ..base import BaseEstimator, ClusterMixin -from ..base import _fit_context -from ..utils._param_validation import Interval, StrOptions, validate_params -from ..utils import check_random_state, as_float_array -from ..metrics.pairwise import pairwise_kernels, KERNEL_PARAMS -from ..neighbors import kneighbors_graph, NearestNeighbors +from ..base import BaseEstimator, ClusterMixin, _fit_context from ..manifold import spectral_embedding +from ..metrics.pairwise import KERNEL_PARAMS, pairwise_kernels +from ..neighbors import NearestNeighbors, kneighbors_graph +from ..utils import as_float_array, check_random_state +from ..utils._param_validation import Interval, StrOptions, validate_params from ._kmeans import k_means diff --git a/sklearn/cluster/tests/common.py b/sklearn/cluster/tests/common.py index 0f4bd9e14926d..b1fe047fe230a 100644 --- a/sklearn/cluster/tests/common.py +++ b/sklearn/cluster/tests/common.py @@ -5,7 +5,6 @@ import numpy as np - ############################################################################### # Generate sample data diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index 52007c375f667..136d2fe6fd781 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -3,20 +3,18 @@ """ -import numpy as np -import pytest import warnings +import numpy as np +import pytest from scipy.sparse import csr_matrix -from sklearn.exceptions import ConvergenceWarning, NotFittedError -from sklearn.utils._testing import assert_array_equal, assert_allclose - -from sklearn.cluster import AffinityPropagation +from sklearn.cluster import AffinityPropagation, affinity_propagation from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences -from sklearn.cluster import affinity_propagation from sklearn.datasets import make_blobs +from sklearn.exceptions import ConvergenceWarning, NotFittedError from sklearn.metrics import euclidean_distances +from sklearn.utils._testing import assert_allclose, assert_array_equal n_clusters = 3 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10 diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index 0a68e97d6fb22..6d4a1067c4048 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -4,23 +4,21 @@ import pytest from scipy.sparse import csr_matrix, issparse -from sklearn.model_selection import ParameterGrid - -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal - from sklearn.base import BaseEstimator, BiclusterMixin - -from sklearn.cluster import SpectralCoclustering -from sklearn.cluster import SpectralBiclustering -from sklearn.cluster._bicluster import _scale_normalize -from sklearn.cluster._bicluster import _bistochastic_normalize -from sklearn.cluster._bicluster import _log_normalize - -from sklearn.metrics import consensus_score, v_measure_score - +from sklearn.cluster import SpectralBiclustering, SpectralCoclustering +from sklearn.cluster._bicluster import ( + _bistochastic_normalize, + _log_normalize, + _scale_normalize, +) from sklearn.datasets import make_biclusters, make_checkerboard +from sklearn.metrics import consensus_score, v_measure_score +from sklearn.model_selection import ParameterGrid +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) class MockBiclustering(BiclusterMixin, BaseEstimator): diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index c2f3c06d15ba7..7fb83f0803f02 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -2,19 +2,16 @@ Tests for the birch clustering algorithm. """ -from scipy import sparse import numpy as np import pytest +from scipy import sparse +from sklearn.cluster import AgglomerativeClustering, Birch from sklearn.cluster.tests.common import generate_clustered_data -from sklearn.cluster import Birch -from sklearn.cluster import AgglomerativeClustering from sklearn.datasets import make_blobs from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import pairwise_distances_argmin, v_measure_score - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_allclose, assert_array_equal def test_n_samples_leaves_roots(global_random_seed, global_dtype): diff --git a/sklearn/cluster/tests/test_bisect_k_means.py b/sklearn/cluster/tests/test_bisect_k_means.py index c79cd0bcca3e8..01afd4be9c8b5 100644 --- a/sklearn/cluster/tests/test_bisect_k_means.py +++ b/sklearn/cluster/tests/test_bisect_k_means.py @@ -2,9 +2,9 @@ import pytest import scipy.sparse as sp -from sklearn.utils._testing import assert_array_equal, assert_allclose from sklearn.cluster import BisectingKMeans from sklearn.metrics import v_measure_score +from sklearn.utils._testing import assert_allclose, assert_array_equal @pytest.mark.parametrize("bisecting_strategy", ["biggest_inertia", "largest_cluster"]) diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index f36eb19caeb0f..972820c6cc137 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -3,23 +3,18 @@ """ import pickle - -import numpy as np - import warnings -from scipy.spatial import distance -from scipy import sparse - +import numpy as np import pytest +from scipy import sparse +from scipy.spatial import distance -from sklearn.utils._testing import assert_array_equal -from sklearn.neighbors import NearestNeighbors -from sklearn.cluster import DBSCAN -from sklearn.cluster import dbscan +from sklearn.cluster import DBSCAN, dbscan from sklearn.cluster.tests.common import generate_clustered_data from sklearn.metrics.pairwise import pairwise_distances - +from sklearn.neighbors import NearestNeighbors +from sklearn.utils._testing import assert_array_equal n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index 3db2862384c74..121e8f2cfe400 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -3,13 +3,14 @@ """ # Authors: Sergul Aydore 2017 import warnings -import numpy as np -from numpy.testing import assert_array_equal +import numpy as np import pytest +from numpy.testing import assert_array_equal + from sklearn.cluster import FeatureAgglomeration -from sklearn.utils._testing import assert_array_almost_equal from sklearn.datasets import make_blobs +from sklearn.utils._testing import assert_array_almost_equal def test_feature_agglomeration(): diff --git a/sklearn/cluster/tests/test_hdbscan.py b/sklearn/cluster/tests/test_hdbscan.py index b652a99aa221f..d1ff6452a5a08 100644 --- a/sklearn/cluster/tests/test_hdbscan.py +++ b/sklearn/cluster/tests/test_hdbscan.py @@ -8,6 +8,12 @@ from scipy.spatial import distance from sklearn.cluster import HDBSCAN +from sklearn.cluster._hdbscan._tree import ( + CONDENSED_dtype, + _condense_tree, + _do_labelling, +) +from sklearn.cluster._hdbscan.hdbscan import _OUTLIER_ENCODING from sklearn.datasets import make_blobs from sklearn.metrics import fowlkes_mallows_score from sklearn.metrics.pairwise import _VALID_METRICS, euclidean_distances @@ -15,12 +21,6 @@ from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle from sklearn.utils._testing import assert_allclose, assert_array_equal -from sklearn.cluster._hdbscan.hdbscan import _OUTLIER_ENCODING -from sklearn.cluster._hdbscan._tree import ( - _do_labelling, - _condense_tree, - CONDENSED_dtype, -) n_clusters_true = 3 X, y = make_blobs(n_samples=200, random_state=10) diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index acaf3c27bedb1..95f28413d132d 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -6,48 +6,48 @@ # Matteo Visconti di Oleggio Castello 2014 # License: BSD 3 clause import itertools -from tempfile import mkdtemp import shutil -import pytest from functools import partial +from tempfile import mkdtemp import numpy as np +import pytest from scipy import sparse from scipy.cluster import hierarchy from scipy.sparse.csgraph import connected_components -from sklearn.metrics.cluster import adjusted_rand_score -from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS -from sklearn.utils._testing import assert_almost_equal, create_memmap_backed_data -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings - -from sklearn.cluster import ward_tree -from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration +from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration, ward_tree from sklearn.cluster._agglomerative import ( - _hc_cut, _TREE_BUILDERS, - linkage_tree, _fix_connectivity, + _hc_cut, + linkage_tree, +) +from sklearn.cluster._hierarchical_fast import ( + average_merge, + max_merge, + mst_linkage_core, ) +from sklearn.datasets import make_circles, make_moons from sklearn.feature_extraction.image import grid_to_graph from sklearn.metrics import DistanceMetric +from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score from sklearn.metrics.pairwise import ( PAIRED_DISTANCES, cosine_distances, manhattan_distances, pairwise_distances, ) -from sklearn.metrics.cluster import normalized_mutual_info_score +from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS from sklearn.neighbors import kneighbors_graph -from sklearn.cluster._hierarchical_fast import ( - average_merge, - max_merge, - mst_linkage_core, -) from sklearn.utils._fast_dict import IntFloatDict -from sklearn.utils._testing import assert_array_equal -from sklearn.datasets import make_moons, make_circles +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + create_memmap_backed_data, + ignore_warnings, +) def test_linkage_misc(): diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index c11d5dd3165c0..a61f548ba11a0 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -2,37 +2,36 @@ import re import sys import warnings +from io import StringIO import numpy as np -from scipy import sparse as sp - import pytest +from scipy import sparse as sp -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils.fixes import threadpool_limits from sklearn.base import clone +from sklearn.cluster import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus +from sklearn.cluster._k_means_common import ( + _euclidean_dense_dense_wrapper, + _euclidean_sparse_dense_wrapper, + _inertia_dense, + _inertia_sparse, + _is_same_clustering, + _relocate_empty_clusters_dense, + _relocate_empty_clusters_sparse, +) +from sklearn.cluster._kmeans import _labels_inertia, _mini_batch_step +from sklearn.datasets import make_blobs from sklearn.exceptions import ConvergenceWarning - -from sklearn.utils.extmath import row_norms -from sklearn.metrics import pairwise_distances -from sklearn.metrics import pairwise_distances_argmin -from sklearn.metrics.pairwise import euclidean_distances +from sklearn.metrics import pairwise_distances, pairwise_distances_argmin from sklearn.metrics.cluster import v_measure_score -from sklearn.cluster import KMeans, k_means, kmeans_plusplus -from sklearn.cluster import MiniBatchKMeans -from sklearn.cluster._kmeans import _labels_inertia -from sklearn.cluster._kmeans import _mini_batch_step -from sklearn.cluster._k_means_common import _relocate_empty_clusters_dense -from sklearn.cluster._k_means_common import _relocate_empty_clusters_sparse -from sklearn.cluster._k_means_common import _euclidean_dense_dense_wrapper -from sklearn.cluster._k_means_common import _euclidean_sparse_dense_wrapper -from sklearn.cluster._k_means_common import _inertia_dense -from sklearn.cluster._k_means_common import _inertia_sparse -from sklearn.cluster._k_means_common import _is_same_clustering -from sklearn.utils._testing import create_memmap_backed_data -from sklearn.datasets import make_blobs -from io import StringIO +from sklearn.metrics.pairwise import euclidean_distances +from sklearn.utils._testing import ( + assert_allclose, + assert_array_equal, + create_memmap_backed_data, +) +from sklearn.utils.extmath import row_norms +from sklearn.utils.fixes import threadpool_limits # TODO(1.4): Remove msg = ( diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py index db13e4d18650f..265c72d0c4ce1 100644 --- a/sklearn/cluster/tests/test_mean_shift.py +++ b/sklearn/cluster/tests/test_mean_shift.py @@ -3,20 +3,15 @@ """ -import numpy as np import warnings -import pytest -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose +import numpy as np +import pytest -from sklearn.cluster import MeanShift -from sklearn.cluster import mean_shift -from sklearn.cluster import estimate_bandwidth -from sklearn.cluster import get_bin_seeds +from sklearn.cluster import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift from sklearn.datasets import make_blobs from sklearn.metrics import v_measure_score - +from sklearn.utils._testing import assert_allclose, assert_array_equal n_clusters = 3 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10 diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py index 0acf818912c0f..d7bf4034ab98a 100644 --- a/sklearn/cluster/tests/test_optics.py +++ b/sklearn/cluster/tests/test_optics.py @@ -1,24 +1,21 @@ # Authors: Shane Grigsby # Adrin Jalali # License: BSD 3 clause +import warnings + import numpy as np import pytest from scipy import sparse -import warnings -from sklearn.datasets import make_blobs -from sklearn.cluster import OPTICS +from sklearn.cluster import DBSCAN, OPTICS from sklearn.cluster._optics import _extend_region, _extract_xi_labels -from sklearn.exceptions import DataConversionWarning +from sklearn.cluster.tests.common import generate_clustered_data +from sklearn.datasets import make_blobs +from sklearn.exceptions import DataConversionWarning, EfficiencyWarning from sklearn.metrics.cluster import contingency_matrix from sklearn.metrics.pairwise import pairwise_distances -from sklearn.cluster import DBSCAN from sklearn.utils import shuffle -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.exceptions import EfficiencyWarning -from sklearn.cluster.tests.common import generate_clustered_data - +from sklearn.utils._testing import assert_allclose, assert_array_equal rng = np.random.RandomState(0) n_points_per_cluster = 10 diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index d301f06e92075..33968a542691a 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -1,24 +1,21 @@ """Testing for Spectral Clustering methods""" +import pickle import re import numpy as np +import pytest from scipy import sparse from scipy.linalg import LinAlgError -import pytest - -import pickle - -from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_equal - from sklearn.cluster import SpectralClustering, spectral_clustering -from sklearn.cluster._spectral import discretize, cluster_qr +from sklearn.cluster._spectral import cluster_qr, discretize +from sklearn.datasets import make_blobs from sklearn.feature_extraction import img_to_graph from sklearn.metrics import adjusted_rand_score from sklearn.metrics.pairwise import kernel_metrics, rbf_kernel from sklearn.neighbors import NearestNeighbors -from sklearn.datasets import make_blobs +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_array_equal try: from pyamg import smoothed_aggregation_solver # noqa diff --git a/sklearn/compose/__init__.py b/sklearn/compose/__init__.py index 8be8d17040e82..7b137cdf9e07f 100644 --- a/sklearn/compose/__init__.py +++ b/sklearn/compose/__init__.py @@ -7,12 +7,11 @@ from ._column_transformer import ( ColumnTransformer, - make_column_transformer, make_column_selector, + make_column_transformer, ) from ._target import TransformedTargetRegressor - __all__ = [ "ColumnTransformer", "make_column_transformer", diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 14349662cfee9..1f5854eac663e 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -6,29 +6,28 @@ # Author: Andreas Mueller # Joris Van den Bossche # License: BSD -from numbers import Integral, Real -from itertools import chain from collections import Counter +from itertools import chain +from numbers import Integral, Real import numpy as np from scipy import sparse -from ..base import clone, TransformerMixin -from ..base import _fit_context -from ..utils._estimator_html_repr import _VisualBlock -from ..pipeline import _fit_transform_one, _transform_one, _name_estimators +from ..base import TransformerMixin, _fit_context, clone +from ..pipeline import _fit_transform_one, _name_estimators, _transform_one from ..preprocessing import FunctionTransformer -from ..utils import Bunch -from ..utils import _safe_indexing -from ..utils import _get_column_indices -from ..utils._param_validation import HasMethods, Interval, StrOptions, Hidden +from ..utils import Bunch, _get_column_indices, _safe_indexing, check_pandas_support +from ..utils._estimator_html_repr import _VisualBlock +from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions from ..utils._set_output import _get_output_config, _safe_set_output -from ..utils import check_pandas_support from ..utils.metaestimators import _BaseComposition -from ..utils.validation import check_array, check_is_fitted, _check_feature_names_in -from ..utils.validation import _num_samples -from ..utils.parallel import delayed, Parallel - +from ..utils.parallel import Parallel, delayed +from ..utils.validation import ( + _check_feature_names_in, + _num_samples, + check_array, + check_is_fitted, +) __all__ = ["ColumnTransformer", "make_column_transformer", "make_column_selector"] diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index e926ed7abe324..348cdda48ea1c 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -6,14 +6,13 @@ import numpy as np -from ..base import BaseEstimator, RegressorMixin, clone -from ..base import _fit_context -from ..utils.validation import check_is_fitted -from ..utils._tags import _safe_tags -from ..utils import check_array, _safe_indexing -from ..utils._param_validation import HasMethods -from ..preprocessing import FunctionTransformer +from ..base import BaseEstimator, RegressorMixin, _fit_context, clone from ..exceptions import NotFittedError +from ..preprocessing import FunctionTransformer +from ..utils import _safe_indexing, check_array +from ..utils._param_validation import HasMethods +from ..utils._tags import _safe_tags +from ..utils.validation import check_is_fitted __all__ = ["TransformedTargetRegressor"] diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index cb9ddc0b4f344..dcf84273b3f14 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -1,28 +1,33 @@ """ Test the ColumnTransformer. """ -import re import pickle +import re import numpy as np -from scipy import sparse import pytest - from numpy.testing import assert_allclose -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_almost_equal +from scipy import sparse from sklearn.base import BaseEstimator, TransformerMixin from sklearn.compose import ( ColumnTransformer, - make_column_transformer, make_column_selector, + make_column_transformer, ) from sklearn.exceptions import NotFittedError -from sklearn.preprocessing import FunctionTransformer -from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder from sklearn.feature_selection import VarianceThreshold +from sklearn.preprocessing import ( + FunctionTransformer, + Normalizer, + OneHotEncoder, + StandardScaler, +) +from sklearn.utils._testing import ( + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_equal, +) class Trans(TransformerMixin, BaseEstimator): diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py index f0d63c00c2772..53242b7e0277b 100644 --- a/sklearn/compose/tests/test_target.py +++ b/sklearn/compose/tests/test_target.py @@ -1,25 +1,14 @@ import numpy as np import pytest -from sklearn.base import clone -from sklearn.base import BaseEstimator -from sklearn.base import TransformerMixin - -from sklearn.dummy import DummyRegressor - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_no_warnings - -from sklearn.preprocessing import FunctionTransformer -from sklearn.preprocessing import StandardScaler - -from sklearn.pipeline import Pipeline - -from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit - from sklearn import datasets - +from sklearn.base import BaseEstimator, TransformerMixin, clone from sklearn.compose import TransformedTargetRegressor +from sklearn.dummy import DummyRegressor +from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import FunctionTransformer, StandardScaler +from sklearn.utils._testing import assert_allclose, assert_no_warnings friedman = datasets.make_friedman1(random_state=0) diff --git a/sklearn/conftest.py b/sklearn/conftest.py index 5d5f80d2e22d5..3d2c73b99a801 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -1,29 +1,29 @@ -from os import environ -from functools import wraps import platform import sys from contextlib import suppress +from functools import wraps +from os import environ from unittest import SkipTest import joblib -import pytest import numpy as np -from threadpoolctl import threadpool_limits +import pytest from _pytest.doctest import DoctestItem +from threadpoolctl import threadpool_limits -from sklearn.utils import _IS_32BIT from sklearn._min_dependencies import PYTEST_MIN_VERSION -from sklearn.utils.fixes import sp_version -from sklearn.utils.fixes import parse_version -from sklearn.datasets import fetch_20newsgroups -from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.datasets import fetch_california_housing -from sklearn.datasets import fetch_covtype -from sklearn.datasets import fetch_kddcup99 -from sklearn.datasets import fetch_olivetti_faces -from sklearn.datasets import fetch_rcv1 +from sklearn.datasets import ( + fetch_20newsgroups, + fetch_20newsgroups_vectorized, + fetch_california_housing, + fetch_covtype, + fetch_kddcup99, + fetch_olivetti_faces, + fetch_rcv1, +) from sklearn.tests import random_seed - +from sklearn.utils import _IS_32BIT +from sklearn.utils.fixes import parse_version, sp_version if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): raise ImportError( diff --git a/sklearn/covariance/__init__.py b/sklearn/covariance/__init__.py index 011fde3647145..8fcf8c68444e5 100644 --- a/sklearn/covariance/__init__.py +++ b/sklearn/covariance/__init__.py @@ -6,24 +6,23 @@ Models. """ +from ._elliptic_envelope import EllipticEnvelope from ._empirical_covariance import ( - empirical_covariance, EmpiricalCovariance, + empirical_covariance, log_likelihood, ) +from ._graph_lasso import GraphicalLasso, GraphicalLassoCV, graphical_lasso +from ._robust_covariance import MinCovDet, fast_mcd from ._shrunk_covariance import ( - shrunk_covariance, + OAS, + LedoitWolf, ShrunkCovariance, ledoit_wolf, ledoit_wolf_shrinkage, - LedoitWolf, oas, - OAS, + shrunk_covariance, ) -from ._robust_covariance import fast_mcd, MinCovDet -from ._graph_lasso import graphical_lasso, GraphicalLasso, GraphicalLassoCV -from ._elliptic_envelope import EllipticEnvelope - __all__ = [ "EllipticEnvelope", diff --git a/sklearn/covariance/_elliptic_envelope.py b/sklearn/covariance/_elliptic_envelope.py index c99f200592580..fe109dddd5303 100644 --- a/sklearn/covariance/_elliptic_envelope.py +++ b/sklearn/covariance/_elliptic_envelope.py @@ -2,14 +2,15 @@ # # License: BSD 3 clause -import numpy as np from numbers import Real -from . import MinCovDet + +import numpy as np + +from ..base import OutlierMixin, _fit_context +from ..metrics import accuracy_score from ..utils._param_validation import Interval from ..utils.validation import check_is_fitted -from ..metrics import accuracy_score -from ..base import OutlierMixin -from ..base import _fit_context +from ._robust_covariance import MinCovDet class EllipticEnvelope(OutlierMixin, MinCovDet): diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py index 8083bfd2e1aa1..e39c18017cdf0 100644 --- a/sklearn/covariance/_empirical_covariance.py +++ b/sklearn/covariance/_empirical_covariance.py @@ -11,16 +11,16 @@ # avoid division truncation import warnings + import numpy as np from scipy import linalg from .. import config_context -from ..base import BaseEstimator -from ..base import _fit_context +from ..base import BaseEstimator, _fit_context +from ..metrics.pairwise import pairwise_distances from ..utils import check_array from ..utils._param_validation import validate_params from ..utils.extmath import fast_logdet -from ..metrics.pairwise import pairwise_distances def log_likelihood(emp_cov, precision): diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py index 8575cc4f75801..2b3248eb0300e 100644 --- a/sklearn/covariance/_graph_lasso.py +++ b/sklearn/covariance/_graph_lasso.py @@ -5,32 +5,30 @@ # Author: Gael Varoquaux # License: BSD 3 clause # Copyright: INRIA -import warnings import operator import sys import time - +import warnings from numbers import Integral, Real + import numpy as np from scipy import linalg -from . import empirical_covariance, EmpiricalCovariance, log_likelihood - from ..base import _fit_context from ..exceptions import ConvergenceWarning -from ..utils.validation import ( - _is_arraylike_not_scalar, - check_random_state, - check_scalar, -) -from ..utils.parallel import delayed, Parallel -from ..utils._param_validation import Interval, StrOptions -from ..utils._param_validation import validate_params # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast' from ..linear_model import _cd_fast as cd_fast # type: ignore from ..linear_model import lars_path_gram from ..model_selection import check_cv, cross_val_score +from ..utils._param_validation import Interval, StrOptions, validate_params +from ..utils.parallel import Parallel, delayed +from ..utils.validation import ( + _is_arraylike_not_scalar, + check_random_state, + check_scalar, +) +from . import EmpiricalCovariance, empirical_covariance, log_likelihood # Helper functions to compute the objective and dual objective functions diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py index c723bba7a097b..a6b32e50a6c1f 100644 --- a/sklearn/covariance/_robust_covariance.py +++ b/sklearn/covariance/_robust_covariance.py @@ -10,15 +10,16 @@ import warnings from numbers import Integral, Real + import numpy as np from scipy import linalg from scipy.stats import chi2 -from . import empirical_covariance, EmpiricalCovariance from ..base import _fit_context -from ..utils.extmath import fast_logdet -from ..utils import check_random_state, check_array +from ..utils import check_array, check_random_state from ..utils._param_validation import Interval +from ..utils.extmath import fast_logdet +from ._empirical_covariance import EmpiricalCovariance, empirical_covariance # Minimum Covariance Determinant diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 21d2e034b45d7..06d65c46faef7 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -14,13 +14,14 @@ # avoid division truncation import warnings -from numbers import Real, Integral +from numbers import Integral, Real + import numpy as np -from . import empirical_covariance, EmpiricalCovariance from ..base import _fit_context from ..utils import check_array from ..utils._param_validation import Interval, validate_params +from . import EmpiricalCovariance, empirical_covariance def _ledoit_wolf(X, *, assume_centered, block_size): diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index bbd3a4757a835..0866c209a10c3 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -7,24 +7,25 @@ import numpy as np import pytest -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal - from sklearn import datasets from sklearn.covariance import ( - empirical_covariance, + OAS, EmpiricalCovariance, - ShrunkCovariance, - shrunk_covariance, LedoitWolf, + ShrunkCovariance, + empirical_covariance, ledoit_wolf, ledoit_wolf_shrinkage, - OAS, oas, + shrunk_covariance, ) from sklearn.covariance._shrunk_covariance import _ledoit_wolf +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) from .._shrunk_covariance import _oas diff --git a/sklearn/covariance/tests/test_elliptic_envelope.py b/sklearn/covariance/tests/test_elliptic_envelope.py index 122d4c8bfb4cc..ca85717fb3782 100644 --- a/sklearn/covariance/tests/test_elliptic_envelope.py +++ b/sklearn/covariance/tests/test_elliptic_envelope.py @@ -6,10 +6,12 @@ import pytest from sklearn.covariance import EllipticEnvelope -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal from sklearn.exceptions import NotFittedError +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) def test_elliptic_envelope(global_random_seed): diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py index 44a60f3e05103..317bf2aa85124 100644 --- a/sklearn/covariance/tests/test_graphical_lasso.py +++ b/sklearn/covariance/tests/test_graphical_lasso.py @@ -1,26 +1,27 @@ """ Test the graphical_lasso module. """ import sys -import pytest +from io import StringIO import numpy as np -from scipy import linalg - +import pytest from numpy.testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_less -from sklearn.utils._testing import _convert_container +from scipy import linalg +from sklearn import datasets from sklearn.covariance import ( - graphical_lasso, GraphicalLasso, GraphicalLassoCV, empirical_covariance, + graphical_lasso, ) from sklearn.datasets import make_sparse_spd_matrix -from io import StringIO from sklearn.utils import check_random_state -from sklearn import datasets +from sklearn.utils._testing import ( + _convert_container, + assert_array_almost_equal, + assert_array_less, +) def test_graphical_lasso(random_state=0): diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py index 213f3d7e8f04b..44dcdbbbf8249 100644 --- a/sklearn/covariance/tests/test_robust_covariance.py +++ b/sklearn/covariance/tests/test_robust_covariance.py @@ -9,11 +9,9 @@ import numpy as np import pytest -from sklearn.utils._testing import assert_array_almost_equal - from sklearn import datasets -from sklearn.covariance import empirical_covariance, MinCovDet -from sklearn.covariance import fast_mcd +from sklearn.covariance import MinCovDet, empirical_covariance, fast_mcd +from sklearn.utils._testing import assert_array_almost_equal X = datasets.load_iris().data X_1d = X[:, 0] diff --git a/sklearn/cross_decomposition/__init__.py b/sklearn/cross_decomposition/__init__.py index ec2f5fb3049af..47b78783caf9c 100644 --- a/sklearn/cross_decomposition/__init__.py +++ b/sklearn/cross_decomposition/__init__.py @@ -1,3 +1,3 @@ -from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA +from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"] diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index da395d8f060fb..f1fc90af11d82 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -5,25 +5,27 @@ # Author: Edouard Duchesnay # License: BSD 3 clause -from numbers import Integral, Real - import warnings from abc import ABCMeta, abstractmethod +from numbers import Integral, Real import numpy as np from scipy.linalg import svd -from ..base import BaseEstimator, RegressorMixin, TransformerMixin -from ..base import MultiOutputMixin -from ..base import ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + MultiOutputMixin, + RegressorMixin, + TransformerMixin, + _fit_context, +) +from ..exceptions import ConvergenceWarning from ..utils import check_array, check_consistent_length -from ..utils.fixes import sp_version -from ..utils.fixes import parse_version -from ..utils.extmath import svd_flip -from ..utils.validation import check_is_fitted, FLOAT_DTYPES from ..utils._param_validation import Interval, StrOptions -from ..exceptions import ConvergenceWarning +from ..utils.extmath import svd_flip +from ..utils.fixes import parse_version, sp_version +from ..utils.validation import FLOAT_DTYPES, check_is_fitted __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"] diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index 8f4840c9b9f21..fcdd927efb389 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -1,21 +1,20 @@ -import pytest import warnings + import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose +import pytest +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal -from sklearn.datasets import load_linnerud +from sklearn.cross_decomposition import CCA, PLSSVD, PLSCanonical, PLSRegression from sklearn.cross_decomposition._pls import ( _center_scale_xy, _get_first_singular_vectors_power_method, _get_first_singular_vectors_svd, _svd_flip_1d, ) -from sklearn.cross_decomposition import CCA -from sklearn.cross_decomposition import PLSSVD, PLSRegression, PLSCanonical -from sklearn.datasets import make_regression +from sklearn.datasets import load_linnerud, make_regression +from sklearn.exceptions import ConvergenceWarning from sklearn.utils import check_random_state from sklearn.utils.extmath import svd_flip -from sklearn.exceptions import ConvergenceWarning def assert_matrix_orthogonal(M): diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py index 465d4159a32c4..7ae7902f3365c 100644 --- a/sklearn/datasets/__init__.py +++ b/sklearn/datasets/__init__.py @@ -5,52 +5,55 @@ """ import textwrap -from ._base import load_breast_cancer -from ._base import load_diabetes -from ._base import load_digits -from ._base import load_files -from ._base import load_iris -from ._base import load_linnerud -from ._base import load_sample_images -from ._base import load_sample_image -from ._base import load_wine -from ._base import get_data_home -from ._base import clear_data_home +from ._base import ( + clear_data_home, + get_data_home, + load_breast_cancer, + load_diabetes, + load_digits, + load_files, + load_iris, + load_linnerud, + load_sample_image, + load_sample_images, + load_wine, +) +from ._california_housing import fetch_california_housing from ._covtype import fetch_covtype from ._kddcup99 import fetch_kddcup99 -from ._lfw import fetch_lfw_pairs -from ._lfw import fetch_lfw_people -from ._twenty_newsgroups import fetch_20newsgroups -from ._twenty_newsgroups import fetch_20newsgroups_vectorized -from ._openml import fetch_openml -from ._samples_generator import make_classification -from ._samples_generator import make_multilabel_classification -from ._samples_generator import make_hastie_10_2 -from ._samples_generator import make_regression -from ._samples_generator import make_blobs -from ._samples_generator import make_moons -from ._samples_generator import make_circles -from ._samples_generator import make_friedman1 -from ._samples_generator import make_friedman2 -from ._samples_generator import make_friedman3 -from ._samples_generator import make_low_rank_matrix -from ._samples_generator import make_sparse_coded_signal -from ._samples_generator import make_sparse_uncorrelated -from ._samples_generator import make_spd_matrix -from ._samples_generator import make_swiss_roll -from ._samples_generator import make_s_curve -from ._samples_generator import make_sparse_spd_matrix -from ._samples_generator import make_gaussian_quantiles -from ._samples_generator import make_biclusters -from ._samples_generator import make_checkerboard -from ._svmlight_format_io import load_svmlight_file -from ._svmlight_format_io import load_svmlight_files -from ._svmlight_format_io import dump_svmlight_file +from ._lfw import fetch_lfw_pairs, fetch_lfw_people from ._olivetti_faces import fetch_olivetti_faces -from ._species_distributions import fetch_species_distributions -from ._california_housing import fetch_california_housing +from ._openml import fetch_openml from ._rcv1 import fetch_rcv1 - +from ._samples_generator import ( + make_biclusters, + make_blobs, + make_checkerboard, + make_circles, + make_classification, + make_friedman1, + make_friedman2, + make_friedman3, + make_gaussian_quantiles, + make_hastie_10_2, + make_low_rank_matrix, + make_moons, + make_multilabel_classification, + make_regression, + make_s_curve, + make_sparse_coded_signal, + make_sparse_spd_matrix, + make_sparse_uncorrelated, + make_spd_matrix, + make_swiss_roll, +) +from ._species_distributions import fetch_species_distributions +from ._svmlight_format_io import ( + dump_svmlight_file, + load_svmlight_file, + load_svmlight_files, +) +from ._twenty_newsgroups import fetch_20newsgroups, fetch_20newsgroups_vectorized __all__ = [ "clear_data_home", diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py index bba06fbb74021..d9cc42de71f66 100644 --- a/sklearn/datasets/_arff_parser.py +++ b/sklearn/datasets/_arff_parser.py @@ -8,7 +8,6 @@ import numpy as np import scipy as sp - from ..externals import _arff from ..externals._arff import ArffSparseDataType from ..utils import ( diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index 014a37ae30b09..7dad2f1eb7cd1 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -7,26 +7,23 @@ # 2010 Olivier Grisel # License: BSD 3 clause import csv -import hashlib import gzip +import hashlib +import os import shutil from collections import namedtuple -import os +from numbers import Integral from os import environ, listdir, makedirs from os.path import expanduser, isdir, join, splitext from pathlib import Path -from numbers import Integral - -from ..preprocessing import scale -from ..utils import Bunch -from ..utils import check_random_state -from ..utils import check_pandas_support -from ..utils.fixes import _open_binary, _open_text, _read_text, _contents -from ..utils._param_validation import validate_params, Interval, StrOptions +from urllib.request import urlretrieve import numpy as np -from urllib.request import urlretrieve +from ..preprocessing import scale +from ..utils import Bunch, check_pandas_support, check_random_state +from ..utils._param_validation import Interval, StrOptions, validate_params +from ..utils.fixes import _contents, _open_binary, _open_text, _read_text DATA_MODULE = "sklearn.datasets.data" DESCR_MODULE = "sklearn.datasets.descr" diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py index 96443c95f9979..0c06544e88317 100644 --- a/sklearn/datasets/_california_housing.py +++ b/sklearn/datasets/_california_housing.py @@ -21,24 +21,24 @@ # Authors: Peter Prettenhofer # License: BSD 3 clause -from os.path import exists -from os import makedirs, remove -import tarfile - -import numpy as np import logging +import tarfile +from os import makedirs, remove +from os.path import exists import joblib +import numpy as np -from . import get_data_home -from ._base import _convert_data_dataframe -from ._base import _fetch_remote -from ._base import _pkl_filepath -from ._base import RemoteFileMetadata -from ._base import load_descr from ..utils import Bunch from ..utils._param_validation import validate_params - +from . import get_data_home +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + _pkl_filepath, + load_descr, +) # The original data can be found at: # https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py index 83bd8ad229924..236e69727b7ef 100644 --- a/sklearn/datasets/_covtype.py +++ b/sklearn/datasets/_covtype.py @@ -14,24 +14,25 @@ # Peter Prettenhofer # License: BSD 3 clause -from gzip import GzipFile import logging -from os.path import exists, join import os +from gzip import GzipFile +from os.path import exists, join from tempfile import TemporaryDirectory -import numpy as np import joblib +import numpy as np -from . import get_data_home -from ._base import _convert_data_dataframe -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import load_descr -from ..utils import Bunch -from ._base import _pkl_filepath -from ..utils import check_random_state +from ..utils import Bunch, check_random_state from ..utils._param_validation import validate_params +from . import get_data_home +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + _pkl_filepath, + load_descr, +) # The original data can be found in: # https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py index 749e15cd53522..30a535c1f4cd4 100644 --- a/sklearn/datasets/_kddcup99.py +++ b/sklearn/datasets/_kddcup99.py @@ -9,24 +9,24 @@ """ import errno -from gzip import GzipFile import logging import os +from gzip import GzipFile from os.path import exists, join -import numpy as np import joblib +import numpy as np -from ._base import _fetch_remote -from ._base import _convert_data_dataframe -from . import get_data_home -from ._base import RemoteFileMetadata -from ._base import load_descr -from ..utils._param_validation import StrOptions, validate_params -from ..utils import Bunch -from ..utils import check_random_state +from ..utils import Bunch, check_random_state from ..utils import shuffle as shuffle_method - +from ..utils._param_validation import StrOptions, validate_params +from . import get_data_home +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + load_descr, +) # The original data can be found at: # https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index 7f6cf8f235d3f..e758eef5dc427 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -8,22 +8,22 @@ # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause -from os import listdir, makedirs, remove -from os.path import join, exists, isdir -from ..utils._param_validation import validate_params, Interval, Hidden, StrOptions -from numbers import Integral, Real import logging +from numbers import Integral, Real +from os import listdir, makedirs, remove +from os.path import exists, isdir, join import numpy as np from joblib import Memory +from ..utils import Bunch +from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params from ._base import ( - get_data_home, - _fetch_remote, RemoteFileMetadata, + _fetch_remote, + get_data_home, load_descr, ) -from ..utils import Bunch logger = logging.getLogger(__name__) diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py index 55f4b856c6cf0..5ef5cb6286c9f 100644 --- a/sklearn/datasets/_olivetti_faces.py +++ b/sklearn/datasets/_olivetti_faces.py @@ -13,20 +13,17 @@ # Copyright (c) 2011 David Warde-Farley # License: BSD 3 clause -from os.path import exists from os import makedirs, remove +from os.path import exists +import joblib import numpy as np from scipy.io import loadmat -import joblib -from . import get_data_home -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import _pkl_filepath -from ._base import load_descr -from ..utils import check_random_state, Bunch +from ..utils import Bunch, check_random_state from ..utils._param_validation import validate_params +from . import get_data_home +from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr # The original data can be found at: # https://cs.nyu.edu/~roweis/data/olivettifaces.mat diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index 21d8eb99858bb..3f525c3433a90 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -7,18 +7,20 @@ from contextlib import closing from functools import wraps from os.path import join -from typing import Callable, Optional, Dict, Tuple, List, Any, Union from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, List, Optional, Tuple, Union from urllib.error import HTTPError, URLError -from urllib.request import urlopen, Request +from urllib.request import Request, urlopen from warnings import warn import numpy as np +from ..utils import ( + Bunch, + check_pandas_support, # noqa +) from . import get_data_home from ._arff_parser import load_arff_from_gzip_file -from ..utils import Bunch -from ..utils import check_pandas_support # noqa __all__ = ["fetch_openml"] diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py index ae391edbad113..a0780edaba9da 100644 --- a/sklearn/datasets/_rcv1.py +++ b/sklearn/datasets/_rcv1.py @@ -9,25 +9,20 @@ # License: BSD 3 clause import logging - -from os import remove, makedirs -from os.path import exists, join from gzip import GzipFile +from os import makedirs, remove +from os.path import exists, join +import joblib import numpy as np import scipy.sparse as sp -import joblib +from ..utils import Bunch +from ..utils import shuffle as shuffle_ +from ..utils._param_validation import StrOptions, validate_params from . import get_data_home -from ._base import _pkl_filepath -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import load_descr +from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr from ._svmlight_format_io import load_svmlight_files -from ..utils import shuffle as shuffle_ -from ..utils import Bunch -from ..utils._param_validation import validate_params, StrOptions - # The original vectorized data can be found at: # http://www.ai.mit.edu/projects/jmlr/papers/volume5/lewis04a/a13-vector-files/lyrl2004_vectors_test_pt0.dat.gz diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index cb3b36d944eb2..9a34c995c0546 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -6,20 +6,20 @@ # G. Louppe, J. Nothman # License: BSD 3 clause -from numbers import Integral, Real -import numbers import array +import numbers import warnings from collections.abc import Iterable +from numbers import Integral, Real import numpy as np -from scipy import linalg import scipy.sparse as sp +from scipy import linalg from ..preprocessing import MultiLabelBinarizer from ..utils import check_array, check_random_state -from ..utils._param_validation import Interval, validate_params, Hidden, StrOptions from ..utils import shuffle as util_shuffle +from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params from ..utils.random import sample_without_replacement diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py index 3387217349e20..8f5a0881bdf6b 100644 --- a/sklearn/datasets/_species_distributions.py +++ b/sklearn/datasets/_species_distributions.py @@ -37,21 +37,18 @@ # # License: BSD 3 clause +import logging from io import BytesIO from os import makedirs, remove from os.path import exists -import logging -import numpy as np - import joblib +import numpy as np -from . import get_data_home -from ._base import _fetch_remote -from ._base import RemoteFileMetadata from ..utils import Bunch -from ._base import _pkl_filepath from ..utils._param_validation import validate_params +from . import get_data_home +from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath # The original data can be found at: # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py index e04d90e15dceb..a48eab7938336 100644 --- a/sklearn/datasets/_svmlight_format_io.py +++ b/sklearn/datasets/_svmlight_format_io.py @@ -15,22 +15,21 @@ # Olivier Grisel # License: BSD 3 clause -from contextlib import closing import os.path +from contextlib import closing +from numbers import Integral import numpy as np import scipy.sparse as sp -from numbers import Integral from .. import __version__ - -from ..utils import check_array, IS_PYPY -from ..utils._param_validation import validate_params, HasMethods, Interval, StrOptions +from ..utils import IS_PYPY, check_array +from ..utils._param_validation import HasMethods, Interval, StrOptions, validate_params if not IS_PYPY: from ._svmlight_format_fast import ( - _load_svmlight_file, _dump_svmlight_file, + _load_svmlight_file, ) else: diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py index 512b7974a497d..2e2dd6aa73234 100644 --- a/sklearn/datasets/_twenty_newsgroups.py +++ b/sklearn/datasets/_twenty_newsgroups.py @@ -24,29 +24,30 @@ # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause -import os +import codecs import logging -import tarfile +import os import pickle -import shutil import re -import codecs +import shutil +import tarfile +import joblib import numpy as np import scipy.sparse as sp -import joblib -from . import get_data_home -from . import load_files -from ._base import _convert_data_dataframe -from ._base import _pkl_filepath -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import load_descr -from ..feature_extraction.text import CountVectorizer from .. import preprocessing -from ..utils import check_random_state, Bunch +from ..feature_extraction.text import CountVectorizer +from ..utils import Bunch, check_random_state from ..utils._param_validation import StrOptions, validate_params +from . import get_data_home, load_files +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + _pkl_filepath, + load_descr, +) logger = logging.getLogger(__name__) diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py index ef1280f6218b1..c8ab1cd04ee6e 100644 --- a/sklearn/datasets/tests/conftest.py +++ b/sklearn/datasets/tests/conftest.py @@ -1,6 +1,7 @@ """ Network tests are only run, if data is already locally available, or if download is specifically requested by environment variable.""" import builtins + import pytest diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py index e30348c894559..af308e49c5ebf 100644 --- a/sklearn/datasets/tests/test_20news.py +++ b/sklearn/datasets/tests/test_20news.py @@ -4,16 +4,17 @@ from functools import partial from unittest.mock import patch -import pytest - import numpy as np +import pytest import scipy.sparse as sp -from sklearn.datasets.tests.test_common import check_as_frame -from sklearn.datasets.tests.test_common import check_pandas_dependency_message -from sklearn.datasets.tests.test_common import check_return_X_y -from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.datasets.tests.test_common import ( + check_as_frame, + check_pandas_dependency_message, + check_return_X_y, +) from sklearn.preprocessing import normalize +from sklearn.utils._testing import assert_allclose_dense_sparse def test_20news(fetch_20newsgroups_fxt): diff --git a/sklearn/datasets/tests/test_arff_parser.py b/sklearn/datasets/tests/test_arff_parser.py index 8465289d187ee..b675439cd2e9d 100644 --- a/sklearn/datasets/tests/test_arff_parser.py +++ b/sklearn/datasets/tests/test_arff_parser.py @@ -1,5 +1,5 @@ -from io import BytesIO import textwrap +from io import BytesIO import pytest diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 23dc78570fc9d..f31f20636c0c1 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -2,31 +2,33 @@ import shutil import tempfile import warnings -from pickle import loads -from pickle import dumps from functools import partial +from pickle import dumps, loads -import pytest import numpy as np -from sklearn.datasets import get_data_home -from sklearn.datasets import clear_data_home -from sklearn.datasets import load_files -from sklearn.datasets import load_sample_images -from sklearn.datasets import load_sample_image -from sklearn.datasets import load_digits -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_linnerud -from sklearn.datasets import load_iris -from sklearn.datasets import load_breast_cancer -from sklearn.datasets import load_wine +import pytest + +from sklearn.datasets import ( + clear_data_home, + get_data_home, + load_breast_cancer, + load_diabetes, + load_digits, + load_files, + load_iris, + load_linnerud, + load_sample_image, + load_sample_images, + load_wine, +) from sklearn.datasets._base import ( load_csv_data, load_gzip_compressed_csv_data, ) +from sklearn.datasets.tests.test_common import check_as_frame from sklearn.preprocessing import scale from sklearn.utils import Bunch from sklearn.utils.fixes import _is_resource -from sklearn.datasets.tests.test_common import check_as_frame def _remove_dir(path): diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py index 495becccd820f..ef6fc95db80bf 100644 --- a/sklearn/datasets/tests/test_california_housing.py +++ b/sklearn/datasets/tests/test_california_housing.py @@ -1,10 +1,11 @@ """Test the california_housing loader, if the data is available, or if specifically requested via environment variable (e.g. for CI jobs).""" +from functools import partial + import pytest from sklearn.datasets.tests.test_common import check_return_X_y -from functools import partial def test_fetch(fetch_california_housing_fxt): diff --git a/sklearn/datasets/tests/test_common.py b/sklearn/datasets/tests/test_common.py index 5f21bdc66b4dc..8048a31041ddc 100644 --- a/sklearn/datasets/tests/test_common.py +++ b/sklearn/datasets/tests/test_common.py @@ -2,8 +2,8 @@ import inspect import os -import pytest import numpy as np +import pytest import sklearn.datasets diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py index 2cc2fed81bad6..e44fdaae69ec3 100644 --- a/sklearn/datasets/tests/test_covtype.py +++ b/sklearn/datasets/tests/test_covtype.py @@ -2,7 +2,9 @@ or if specifically requested via environment variable (e.g. for CI jobs).""" from functools import partial + import pytest + from sklearn.datasets.tests.test_common import check_return_X_y diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py index 8eb1d6ec71eb3..5f6e9c83a30b8 100644 --- a/sklearn/datasets/tests/test_kddcup99.py +++ b/sklearn/datasets/tests/test_kddcup99.py @@ -7,11 +7,14 @@ """ from functools import partial + import pytest -from sklearn.datasets.tests.test_common import check_as_frame -from sklearn.datasets.tests.test_common import check_pandas_dependency_message -from sklearn.datasets.tests.test_common import check_return_X_y +from sklearn.datasets.tests.test_common import ( + check_as_frame, + check_pandas_dependency_message, + check_return_X_y, +) @pytest.mark.parametrize("as_frame", [True, False]) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 36f33d8a10289..92edb99ce3b0b 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -8,19 +8,18 @@ joblib, successive runs will be fast (less than 200ms). """ -import random import os +import random import shutil import tempfile +from functools import partial + import numpy as np import pytest -from functools import partial -from sklearn.datasets import fetch_lfw_pairs -from sklearn.datasets import fetch_lfw_people -from sklearn.utils._testing import assert_array_equal +from sklearn.datasets import fetch_lfw_pairs, fetch_lfw_people from sklearn.datasets.tests.test_common import check_return_X_y - +from sklearn.utils._testing import assert_array_equal SCIKIT_LEARN_DATA = None SCIKIT_LEARN_EMPTY_DATA = None diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py index 18fceb0ed8b0e..e5d6c853aa454 100644 --- a/sklearn/datasets/tests/test_olivetti_faces.py +++ b/sklearn/datasets/tests/test_olivetti_faces.py @@ -4,9 +4,8 @@ import numpy as np -from sklearn.utils import Bunch from sklearn.datasets.tests.test_common import check_return_X_y - +from sklearn.utils import Bunch from sklearn.utils._testing import assert_array_equal diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index c13b82dd769d3..8c78b753f336f 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -8,28 +8,26 @@ from urllib.error import HTTPError import numpy as np -import scipy.sparse import pytest +import scipy.sparse import sklearn from sklearn import config_context +from sklearn.datasets import fetch_openml as fetch_openml_orig +from sklearn.datasets._openml import ( + _OPENML_PREFIX, + _get_local_path, + _open_openml_url, + _retry_with_clean_cache, +) from sklearn.utils import Bunch, check_pandas_support -from sklearn.utils.fixes import _open_binary from sklearn.utils._testing import ( SkipTest, assert_allclose, assert_array_equal, fails_if_pypy, ) - -from sklearn.datasets import fetch_openml as fetch_openml_orig -from sklearn.datasets._openml import ( - _OPENML_PREFIX, - _open_openml_url, - _get_local_path, - _retry_with_clean_cache, -) - +from sklearn.utils.fixes import _open_binary OPENML_TEST_DATA_MODULE = "sklearn.datasets.tests.data.openml" # if True, urlopen will be monkey patched to only use local files diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py index 11d0335f4fb8c..fbb9d67015a30 100644 --- a/sklearn/datasets/tests/test_rcv1.py +++ b/sklearn/datasets/tests/test_rcv1.py @@ -2,12 +2,13 @@ or if specifically requested via environment variable (e.g. for CI jobs).""" -import scipy.sparse as sp -import numpy as np from functools import partial + +import numpy as np +import scipy.sparse as sp + from sklearn.datasets.tests.test_common import check_return_X_y -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_almost_equal, assert_array_equal def test_fetch_rcv1(fetch_rcv1_fxt, global_random_seed): diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index cd23fc5016672..ad6569f0863bf 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -6,31 +6,33 @@ import pytest import scipy.sparse as sp -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import ignore_warnings - -from sklearn.datasets import make_classification -from sklearn.datasets import make_multilabel_classification -from sklearn.datasets import make_hastie_10_2 -from sklearn.datasets import make_regression -from sklearn.datasets import make_blobs -from sklearn.datasets import make_friedman1 -from sklearn.datasets import make_friedman2 -from sklearn.datasets import make_friedman3 -from sklearn.datasets import make_low_rank_matrix -from sklearn.datasets import make_moons -from sklearn.datasets import make_circles -from sklearn.datasets import make_sparse_coded_signal -from sklearn.datasets import make_sparse_uncorrelated -from sklearn.datasets import make_spd_matrix -from sklearn.datasets import make_swiss_roll -from sklearn.datasets import make_s_curve -from sklearn.datasets import make_biclusters -from sklearn.datasets import make_checkerboard - +from sklearn.datasets import ( + make_biclusters, + make_blobs, + make_checkerboard, + make_circles, + make_classification, + make_friedman1, + make_friedman2, + make_friedman3, + make_hastie_10_2, + make_low_rank_matrix, + make_moons, + make_multilabel_classification, + make_regression, + make_s_curve, + make_sparse_coded_signal, + make_sparse_uncorrelated, + make_spd_matrix, + make_swiss_roll, +) +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) from sklearn.utils.validation import assert_all_finite diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index 0b76cce3c5a4d..213e9095a73da 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -1,22 +1,23 @@ -from bz2 import BZ2File import gzip -from io import BytesIO -import numpy as np -import scipy.sparse as sp import os import shutil +from bz2 import BZ2File +from io import BytesIO from tempfile import NamedTemporaryFile +import numpy as np import pytest - -from sklearn.utils.fixes import _open_binary, _path -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal, assert_allclose -from sklearn.utils._testing import fails_if_pypy +import scipy.sparse as sp import sklearn -from sklearn.datasets import load_svmlight_file, load_svmlight_files, dump_svmlight_file - +from sklearn.datasets import dump_svmlight_file, load_svmlight_file, load_svmlight_files +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + fails_if_pypy, +) +from sklearn.utils.fixes import _open_binary, _path TEST_DATA_MODULE = "sklearn.datasets.tests.data" datafile = "svmlight_classification.txt" diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py index c5f323d3c5d72..1f9cfe07dc0e8 100644 --- a/sklearn/decomposition/__init__.py +++ b/sklearn/decomposition/__init__.py @@ -5,29 +5,28 @@ """ -from ._nmf import ( - NMF, - MiniBatchNMF, - non_negative_factorization, -) -from ._pca import PCA -from ._incremental_pca import IncrementalPCA -from ._kernel_pca import KernelPCA -from ._sparse_pca import SparsePCA, MiniBatchSparsePCA -from ._truncated_svd import TruncatedSVD -from ._fastica import FastICA, fastica +from ..utils.extmath import randomized_svd from ._dict_learning import ( - dict_learning, - dict_learning_online, - sparse_encode, DictionaryLearning, MiniBatchDictionaryLearning, SparseCoder, + dict_learning, + dict_learning_online, + sparse_encode, ) from ._factor_analysis import FactorAnalysis -from ..utils.extmath import randomized_svd +from ._fastica import FastICA, fastica +from ._incremental_pca import IncrementalPCA +from ._kernel_pca import KernelPCA from ._lda import LatentDirichletAllocation - +from ._nmf import ( + NMF, + MiniBatchNMF, + non_negative_factorization, +) +from ._pca import PCA +from ._sparse_pca import MiniBatchSparsePCA, SparsePCA +from ._truncated_svd import TruncatedSVD __all__ = [ "DictionaryLearning", diff --git a/sklearn/decomposition/_base.py b/sklearn/decomposition/_base.py index 20bf7af4f284a..9634395a335ba 100644 --- a/sklearn/decomposition/_base.py +++ b/sklearn/decomposition/_base.py @@ -8,12 +8,13 @@ # # License: BSD 3 clause +from abc import ABCMeta, abstractmethod + import numpy as np from scipy import linalg -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin +from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin from ..utils.validation import check_is_fitted -from abc import ABCMeta, abstractmethod class _BasePCA( diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 54b3590f5b62e..b6972235dca7d 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -3,27 +3,29 @@ # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort # License: BSD 3 clause -import time -import sys import itertools -from numbers import Integral, Real +import sys +import time import warnings - from math import ceil +from numbers import Integral, Real import numpy as np -from scipy import linalg from joblib import effective_n_jobs +from scipy import linalg -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context -from ..utils import check_array, check_random_state, gen_even_slices, gen_batches -from ..utils._param_validation import Hidden, Interval, StrOptions -from ..utils._param_validation import validate_params +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) +from ..linear_model import Lars, Lasso, LassoLars, orthogonal_mp_gram +from ..utils import check_array, check_random_state, gen_batches, gen_even_slices +from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params from ..utils.extmath import randomized_svd, row_norms, svd_flip +from ..utils.parallel import Parallel, delayed from ..utils.validation import check_is_fitted -from ..utils.parallel import delayed, Parallel -from ..linear_model import Lasso, orthogonal_mp_gram, LassoLars, Lars def _check_positive_coding(method, positive): diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py index 8c3d590b2c814..af3498d534483 100644 --- a/sklearn/decomposition/_factor_analysis.py +++ b/sklearn/decomposition/_factor_analysis.py @@ -20,19 +20,23 @@ # License: BSD3 import warnings -from math import sqrt, log +from math import log, sqrt from numbers import Integral, Real + import numpy as np from scipy import linalg - -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) +from ..exceptions import ConvergenceWarning from ..utils import check_random_state from ..utils._param_validation import Interval, StrOptions from ..utils.extmath import fast_logdet, randomized_svd, squared_norm from ..utils.validation import check_is_fitted -from ..exceptions import ConvergenceWarning class FactorAnalysis(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index 6dcf62c0ace3b..da7f6393c2b7f 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -15,12 +15,16 @@ import numpy as np from scipy import linalg -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) from ..exceptions import ConvergenceWarning -from ..utils import check_array, as_float_array, check_random_state +from ..utils import as_float_array, check_array, check_random_state +from ..utils._param_validation import Interval, Options, StrOptions, validate_params from ..utils.validation import check_is_fitted -from ..utils._param_validation import Interval, StrOptions, Options, validate_params __all__ = ["fastica", "FastICA"] diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py index 5ae5d58b06ca4..f05e2dacc66b2 100644 --- a/sklearn/decomposition/_incremental_pca.py +++ b/sklearn/decomposition/_incremental_pca.py @@ -5,14 +5,15 @@ # License: BSD 3 clause from numbers import Integral + import numpy as np from scipy import linalg, sparse -from ._base import _BasePCA from ..base import _fit_context from ..utils import gen_batches from ..utils._param_validation import Interval -from ..utils.extmath import svd_flip, _incremental_mean_and_var +from ..utils.extmath import _incremental_mean_and_var, svd_flip +from ._base import _BasePCA class IncrementalPCA(_BasePCA): diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py index 61d502a006c5e..ccf79e896f210 100644 --- a/sklearn/decomposition/_kernel_pca.py +++ b/sklearn/decomposition/_kernel_pca.py @@ -4,24 +4,29 @@ # Sylvain Marie # License: BSD 3 clause -import numpy as np from numbers import Integral, Real + +import numpy as np from scipy import linalg -from scipy.sparse.linalg import eigsh from scipy.linalg import eigh +from scipy.sparse.linalg import eigsh +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) +from ..exceptions import NotFittedError +from ..metrics.pairwise import pairwise_kernels +from ..preprocessing import KernelCenterer from ..utils._arpack import _init_arpack_v0 -from ..utils.extmath import svd_flip, _randomized_eigsh +from ..utils._param_validation import Interval, StrOptions +from ..utils.extmath import _randomized_eigsh, svd_flip from ..utils.validation import ( - check_is_fitted, _check_psd_eigenvalues, + check_is_fitted, ) -from ..utils._param_validation import Interval, StrOptions -from ..exceptions import NotFittedError -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context -from ..preprocessing import KernelCenterer -from ..metrics.pairwise import pairwise_kernels class KernelPCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py index ab1ea5ebb5460..9e161c178b9e3 100644 --- a/sklearn/decomposition/_lda.py +++ b/sklearn/decomposition/_lda.py @@ -14,22 +14,28 @@ import numpy as np import scipy.sparse as sp -from scipy.special import gammaln, logsumexp from joblib import effective_n_jobs +from scipy.special import gammaln, logsumexp -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) from ..utils import check_random_state, gen_batches, gen_even_slices -from ..utils.validation import check_non_negative -from ..utils.validation import check_is_fitted -from ..utils.parallel import delayed, Parallel from ..utils._param_validation import Interval, StrOptions - +from ..utils.parallel import Parallel, delayed +from ..utils.validation import check_is_fitted, check_non_negative from ._online_lda_fast import ( - mean_change as cy_mean_change, _dirichlet_expectation_1d as cy_dirichlet_expectation_1d, +) +from ._online_lda_fast import ( _dirichlet_expectation_2d, ) +from ._online_lda_fast import ( + mean_change as cy_mean_change, +) EPS = np.finfo(float).eps diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index d561583dec205..40db8edd0b2fd 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -6,34 +6,37 @@ # Tom Dupre la Tour # License: BSD 3 clause +import itertools +import time +import warnings from abc import ABC +from math import sqrt from numbers import Integral, Real + import numpy as np import scipy.sparse as sp -import time -import itertools -import warnings -from math import sqrt from scipy import linalg -from ._cdnmf_fast import _update_cdnmf_fast from .._config import config_context -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context -from ..exceptions import ConvergenceWarning -from ..utils import check_random_state, check_array, gen_batches -from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm -from ..utils.validation import ( - check_is_fitted, - check_non_negative, +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, ) +from ..exceptions import ConvergenceWarning +from ..utils import check_array, check_random_state, gen_batches, metadata_routing from ..utils._param_validation import ( Interval, StrOptions, validate_params, ) -from ..utils import metadata_routing - +from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm +from ..utils.validation import ( + check_is_fitted, + check_non_negative, +) +from ._cdnmf_fast import _update_cdnmf_fast EPSILON = np.finfo(np.float32).eps diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py index 1d3c0678aca89..96931324d7cae 100644 --- a/sklearn/decomposition/_pca.py +++ b/sklearn/decomposition/_pca.py @@ -15,20 +15,18 @@ import numpy as np from scipy import linalg -from scipy.special import gammaln from scipy.sparse import issparse from scipy.sparse.linalg import svds +from scipy.special import gammaln -from ._base import _BasePCA from ..base import _fit_context from ..utils import check_random_state from ..utils._arpack import _init_arpack_v0 +from ..utils._param_validation import Interval, RealNotInt, StrOptions from ..utils.deprecation import deprecated -from ..utils.extmath import fast_logdet, randomized_svd, svd_flip -from ..utils.extmath import stable_cumsum +from ..utils.extmath import fast_logdet, randomized_svd, stable_cumsum, svd_flip from ..utils.validation import check_is_fitted -from ..utils._param_validation import Interval, StrOptions -from ..utils._param_validation import RealNotInt +from ._base import _BasePCA def _assess_dimension(spectrum, rank, n_samples): diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index 93e4a2164a87f..aa4dec2fb7ee9 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -6,14 +6,18 @@ import numpy as np +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) +from ..linear_model import ridge_regression from ..utils import check_random_state -from ..utils.extmath import svd_flip from ..utils._param_validation import Hidden, Interval, StrOptions +from ..utils.extmath import svd_flip from ..utils.validation import check_array, check_is_fitted -from ..linear_model import ridge_regression -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context -from ._dict_learning import dict_learning, MiniBatchDictionaryLearning +from ._dict_learning import MiniBatchDictionaryLearning, dict_learning class _BaseSparsePCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py index 67f5c73028f15..725683e8d46c6 100644 --- a/sklearn/decomposition/_truncated_svd.py +++ b/sklearn/decomposition/_truncated_svd.py @@ -7,18 +7,23 @@ # License: 3-clause BSD. from numbers import Integral, Real + import numpy as np import scipy.sparse as sp from scipy.sparse.linalg import svds -from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin -from ..base import _fit_context +from ..base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) from ..utils import check_array, check_random_state from ..utils._arpack import _init_arpack_v0 +from ..utils._param_validation import Interval, StrOptions from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip from ..utils.sparsefuncs import mean_variance_axis from ..utils.validation import check_is_fitted -from ..utils._param_validation import Interval, StrOptions __all__ = ["TruncatedSVD"] diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index 6e6ddd20acb8c..0986bc6b3feed 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -1,38 +1,37 @@ -import pytest +import itertools import warnings +from functools import partial import numpy as np -from functools import partial -import itertools +import pytest import sklearn - from sklearn.base import clone - +from sklearn.decomposition import ( + DictionaryLearning, + MiniBatchDictionaryLearning, + SparseCoder, + dict_learning, + dict_learning_online, + sparse_encode, +) +from sklearn.decomposition._dict_learning import _update_dict from sklearn.exceptions import ConvergenceWarning - from sklearn.utils import check_array +from sklearn.utils._testing import ( + TempMemmap, + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.estimator_checks import ( + check_transformer_data_not_an_array, + check_transformer_general, + check_transformers_unfitted, +) from sklearn.utils.parallel import Parallel -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import TempMemmap - -from sklearn.decomposition import DictionaryLearning -from sklearn.decomposition import MiniBatchDictionaryLearning -from sklearn.decomposition import SparseCoder -from sklearn.decomposition import dict_learning -from sklearn.decomposition import dict_learning_online -from sklearn.decomposition import sparse_encode -from sklearn.utils.estimator_checks import check_transformer_data_not_an_array -from sklearn.utils.estimator_checks import check_transformer_general -from sklearn.utils.estimator_checks import check_transformers_unfitted - -from sklearn.decomposition._dict_learning import _update_dict - - rng_global = np.random.RandomState(0) n_samples, n_features = 10, 8 X = rng_global.randn(n_samples, n_features) @@ -397,8 +396,8 @@ def test_dict_learning_online_positivity(positive_code, positive_dict): def test_dict_learning_online_verbosity(): # test verbosity for better coverage n_components = 5 - from io import StringIO import sys + from io import StringIO old_stdout = sys.stdout try: diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py index 4284327f3eeb4..2ff14f8d71722 100644 --- a/sklearn/decomposition/tests/test_factor_analysis.py +++ b/sklearn/decomposition/tests/test_factor_analysis.py @@ -7,12 +7,14 @@ import numpy as np import pytest -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.exceptions import ConvergenceWarning from sklearn.decomposition import FactorAnalysis -from sklearn.utils._testing import ignore_warnings from sklearn.decomposition._factor_analysis import _ortho_rotation +from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + ignore_warnings, +) # Ignore warnings from switching to more power iterations in randomized_svd diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 14938b3787a98..6a376b01ecb19 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -2,18 +2,17 @@ Test the fastica algorithm. """ import itertools -import pytest -import warnings import os +import warnings import numpy as np +import pytest from scipy import stats -from sklearn.utils._testing import assert_allclose - -from sklearn.decomposition import FastICA, fastica, PCA +from sklearn.decomposition import PCA, FastICA, fastica from sklearn.decomposition._fastica import _gs_decorrelation from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import assert_allclose def center_and_norm(x, axis=-1): diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index d8402dad24c04..6ef500b42026b 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -1,17 +1,18 @@ """Tests for Incremental PCA.""" -import numpy as np -import pytest import warnings -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose_dense_sparse +import numpy as np +import pytest from numpy.testing import assert_array_equal +from scipy import sparse from sklearn import datasets from sklearn.decomposition import PCA, IncrementalPCA - -from scipy import sparse +from sklearn.utils._testing import ( + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_almost_equal, +) iris = datasets.load_iris() diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index 39aa32a3e9694..3c95454749b4a 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -1,23 +1,22 @@ -import numpy as np -import scipy.sparse as sp -import pytest import warnings -from sklearn.utils._testing import ( - assert_array_almost_equal, - assert_array_equal, - assert_allclose, -) +import numpy as np +import pytest +import scipy.sparse as sp +from sklearn.datasets import make_blobs, make_circles from sklearn.decomposition import PCA, KernelPCA -from sklearn.datasets import make_circles -from sklearn.datasets import make_blobs from sklearn.exceptions import NotFittedError from sklearn.linear_model import Perceptron +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.model_selection import GridSearchCV -from sklearn.metrics.pairwise import rbf_kernel +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, +) from sklearn.utils.validation import _check_psd_eigenvalues diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 2b1ed4d91be5e..2cd027f90cdd6 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -1,27 +1,26 @@ import re import sys -from io import StringIO import warnings +from io import StringIO import numpy as np +import pytest import scipy.sparse as sp - from scipy import linalg -from sklearn.decomposition import NMF, MiniBatchNMF -from sklearn.decomposition import non_negative_factorization -from sklearn.decomposition import _nmf as nmf # For testing internals from scipy.sparse import csc_matrix -import pytest - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import ignore_warnings -from sklearn.utils.extmath import squared_norm from sklearn.base import clone +from sklearn.decomposition import NMF, MiniBatchNMF, non_negative_factorization +from sklearn.decomposition import _nmf as nmf # For testing internals from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.extmath import squared_norm @pytest.mark.parametrize( diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index 872bd55916fcb..50c812bcb9f14 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -1,26 +1,25 @@ import sys +from io import StringIO import numpy as np +import pytest +from numpy.testing import assert_array_equal from scipy.linalg import block_diag from scipy.sparse import csr_matrix from scipy.special import psi -from numpy.testing import assert_array_equal - -import pytest from sklearn.decomposition import LatentDirichletAllocation from sklearn.decomposition._online_lda_fast import ( _dirichlet_expectation_1d, _dirichlet_expectation_2d, ) - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import if_safe_multiprocessing_with_blas - from sklearn.exceptions import NotFittedError -from io import StringIO +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + if_safe_multiprocessing_with_blas, +) def _build_sparse_mtx(): diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index 5bf893f92fd16..0176ebd0be9e7 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -1,17 +1,15 @@ +import warnings + import numpy as np +import pytest import scipy as sp from numpy.testing import assert_array_equal -import pytest -import warnings - -from sklearn.utils._testing import assert_allclose - from sklearn import datasets -from sklearn.decomposition import PCA from sklearn.datasets import load_iris -from sklearn.decomposition._pca import _assess_dimension -from sklearn.decomposition._pca import _infer_dimension +from sklearn.decomposition import PCA +from sklearn.decomposition._pca import _assess_dimension, _infer_dimension +from sklearn.utils._testing import assert_allclose iris = datasets.load_iris() PCA_SOLVERS = ["full", "arpack", "randomized", "auto"] diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index cf237014c6049..4abbbd515aeb9 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -2,17 +2,18 @@ # License: BSD 3 clause import sys -import pytest import numpy as np +import pytest from numpy.testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import if_safe_multiprocessing_with_blas - -from sklearn.decomposition import SparsePCA, MiniBatchSparsePCA, PCA +from sklearn.decomposition import PCA, MiniBatchSparsePCA, SparsePCA from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + if_safe_multiprocessing_with_blas, +) def generate_toy_data(n_components, n_samples, image_size, random_state=None): diff --git a/sklearn/decomposition/tests/test_truncated_svd.py b/sklearn/decomposition/tests/test_truncated_svd.py index bd0bde6e08aa7..4edb7d4a11109 100644 --- a/sklearn/decomposition/tests/test_truncated_svd.py +++ b/sklearn/decomposition/tests/test_truncated_svd.py @@ -1,13 +1,12 @@ """Test truncated SVD transformer.""" import numpy as np -import scipy.sparse as sp - import pytest +import scipy.sparse as sp -from sklearn.decomposition import TruncatedSVD, PCA +from sklearn.decomposition import PCA, TruncatedSVD from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_less, assert_allclose +from sklearn.utils._testing import assert_allclose, assert_array_less SVD_SOLVERS = ["arpack", "randomized"] diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 275f4ae4d3b30..29146ca857694 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -10,24 +10,27 @@ # License: BSD 3-Clause import warnings +from numbers import Integral, Real + import numpy as np import scipy.linalg from scipy import linalg -from numbers import Real, Integral -from .base import BaseEstimator, TransformerMixin, ClassifierMixin -from .base import ClassNamePrefixFeaturesOutMixin -from .base import _fit_context +from .base import ( + BaseEstimator, + ClassifierMixin, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) +from .covariance import empirical_covariance, ledoit_wolf, shrunk_covariance from .linear_model._base import LinearClassifierMixin -from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance -from .utils.multiclass import unique_labels -from .utils.validation import check_is_fitted -from .utils._array_api import get_namespace, _expit, device, size -from .utils.multiclass import check_classification_targets -from .utils.extmath import softmax -from .utils._param_validation import StrOptions, Interval, HasMethods from .preprocessing import StandardScaler - +from .utils._array_api import _expit, device, get_namespace, size +from .utils._param_validation import HasMethods, Interval, StrOptions +from .utils.extmath import softmax +from .utils.multiclass import check_classification_targets, unique_labels +from .utils.validation import check_is_fitted __all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"] diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 0d8519484d7a5..1db664826f5c9 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -9,18 +9,25 @@ import numpy as np import scipy.sparse as sp -from .base import BaseEstimator, ClassifierMixin, RegressorMixin -from .base import MultiOutputMixin -from .base import _fit_context +from .base import ( + BaseEstimator, + ClassifierMixin, + MultiOutputMixin, + RegressorMixin, + _fit_context, +) from .utils import check_random_state -from .utils._param_validation import StrOptions, Interval -from .utils.validation import _num_samples -from .utils.validation import check_array -from .utils.validation import check_consistent_length -from .utils.validation import check_is_fitted, _check_sample_weight +from .utils._param_validation import Interval, StrOptions +from .utils.multiclass import class_distribution from .utils.random import _random_choice_csc from .utils.stats import _weighted_percentile -from .utils.multiclass import class_distribution +from .utils.validation import ( + _check_sample_weight, + _num_samples, + check_array, + check_consistent_length, + check_is_fitted, +) class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator): diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py index e892d36a0ce46..f4a3756bdaf1d 100644 --- a/sklearn/ensemble/__init__.py +++ b/sklearn/ensemble/__init__.py @@ -2,27 +2,24 @@ The :mod:`sklearn.ensemble` module includes ensemble-based methods for classification, regression and anomaly detection. """ +from ._bagging import BaggingClassifier, BaggingRegressor from ._base import BaseEnsemble -from ._forest import RandomForestClassifier -from ._forest import RandomForestRegressor -from ._forest import RandomTreesEmbedding -from ._forest import ExtraTreesClassifier -from ._forest import ExtraTreesRegressor -from ._bagging import BaggingClassifier -from ._bagging import BaggingRegressor -from ._iforest import IsolationForest -from ._weight_boosting import AdaBoostClassifier -from ._weight_boosting import AdaBoostRegressor -from ._gb import GradientBoostingClassifier -from ._gb import GradientBoostingRegressor -from ._voting import VotingClassifier -from ._voting import VotingRegressor -from ._stacking import StackingClassifier -from ._stacking import StackingRegressor +from ._forest import ( + ExtraTreesClassifier, + ExtraTreesRegressor, + RandomForestClassifier, + RandomForestRegressor, + RandomTreesEmbedding, +) +from ._gb import GradientBoostingClassifier, GradientBoostingRegressor from ._hist_gradient_boosting.gradient_boosting import ( - HistGradientBoostingRegressor, HistGradientBoostingClassifier, + HistGradientBoostingRegressor, ) +from ._iforest import IsolationForest +from ._stacking import StackingClassifier, StackingRegressor +from ._voting import VotingClassifier, VotingRegressor +from ._weight_boosting import AdaBoostClassifier, AdaBoostRegressor __all__ = [ "BaseEnsemble", diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index 0354413fdebfe..117bf470c509f 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -6,28 +6,25 @@ import itertools import numbers -import numpy as np from abc import ABCMeta, abstractmethod +from functools import partial from numbers import Integral from warnings import warn -from functools import partial -from ._base import BaseEnsemble, _partition_estimators -from ..base import ClassifierMixin, RegressorMixin -from ..base import _fit_context -from ..metrics import r2_score, accuracy_score +import numpy as np + +from ..base import ClassifierMixin, RegressorMixin, _fit_context +from ..metrics import accuracy_score, r2_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import check_random_state, column_or_1d -from ..utils import indices_to_mask +from ..utils import check_random_state, column_or_1d, indices_to_mask +from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions +from ..utils._tags import _safe_tags from ..utils.metaestimators import available_if from ..utils.multiclass import check_classification_targets +from ..utils.parallel import Parallel, delayed from ..utils.random import sample_without_replacement -from ..utils._param_validation import Interval, HasMethods, StrOptions -from ..utils._param_validation import RealNotInt -from ..utils.validation import has_fit_parameter, check_is_fitted, _check_sample_weight -from ..utils._tags import _safe_tags -from ..utils.parallel import delayed, Parallel - +from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter +from ._base import BaseEnsemble, _partition_estimators __all__ = ["BaggingClassifier", "BaggingRegressor"] diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py index 3850fa724f11a..3107b4cf9a6c5 100644 --- a/sklearn/ensemble/_base.py +++ b/sklearn/ensemble/_base.py @@ -3,20 +3,15 @@ # Authors: Gilles Louppe # License: BSD 3 clause +import warnings from abc import ABCMeta, abstractmethod from typing import List -import warnings import numpy as np - from joblib import effective_n_jobs -from ..base import clone -from ..base import is_classifier, is_regressor -from ..base import BaseEstimator -from ..base import MetaEstimatorMixin -from ..utils import Bunch, _print_elapsed_time, deprecated -from ..utils import check_random_state +from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier, is_regressor +from ..utils import Bunch, _print_elapsed_time, check_random_state, deprecated from ..utils.metaestimators import _BaseComposition diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index ce3a6f78b241d..df8ecc974dd34 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -40,19 +40,24 @@ class calls the ``fit`` method of each sub-estimator on random samples # License: BSD 3 clause +import threading +from abc import ABCMeta, abstractmethod from numbers import Integral, Real from warnings import catch_warnings, simplefilter, warn -import threading -from abc import ABCMeta, abstractmethod import numpy as np -from scipy.sparse import issparse from scipy.sparse import hstack as sparse_hstack +from scipy.sparse import issparse -from ..base import is_classifier -from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin -from ..base import _fit_context - +from ..base import ( + ClassifierMixin, + MultiOutputMixin, + RegressorMixin, + TransformerMixin, + _fit_context, + is_classifier, +) +from ..exceptions import DataConversionWarning from ..metrics import accuracy_score, r2_score from ..preprocessing import OneHotEncoder from ..tree import ( @@ -62,21 +67,18 @@ class calls the ``fit`` method of each sub-estimator on random samples ExtraTreeClassifier, ExtraTreeRegressor, ) -from ..tree._tree import DTYPE, DOUBLE +from ..tree._tree import DOUBLE, DTYPE from ..utils import check_random_state, compute_sample_weight -from ..exceptions import DataConversionWarning -from ._base import BaseEnsemble, _partition_estimators -from ..utils.parallel import delayed, Parallel +from ..utils._param_validation import Interval, RealNotInt, StrOptions from ..utils.multiclass import check_classification_targets, type_of_target +from ..utils.parallel import Parallel, delayed from ..utils.validation import ( - check_is_fitted, - _check_sample_weight, _check_feature_names_in, + _check_sample_weight, + _num_samples, + check_is_fitted, ) -from ..utils.validation import _num_samples -from ..utils._param_validation import Interval, StrOptions -from ..utils._param_validation import RealNotInt - +from ._base import BaseEnsemble, _partition_estimators __all__ = [ "RandomForestClassifier", diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 1b924749f52bd..777e1a18d8396 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -20,37 +20,26 @@ # Arnaud Joly, Jacob Schreiber # License: BSD 3 clause -from abc import ABCMeta -from abc import abstractmethod -from numbers import Integral, Real import warnings - -from ._base import BaseEnsemble -from ..base import ClassifierMixin, RegressorMixin -from ..base import is_classifier -from ..base import _fit_context - -from ._gradient_boosting import predict_stages -from ._gradient_boosting import predict_stage -from ._gradient_boosting import _random_sample_mask +from abc import ABCMeta, abstractmethod +from numbers import Integral, Real +from time import time import numpy as np +from scipy.sparse import csc_matrix, csr_matrix, issparse -from scipy.sparse import csc_matrix -from scipy.sparse import csr_matrix -from scipy.sparse import issparse - -from time import time +from ..base import ClassifierMixin, RegressorMixin, _fit_context, is_classifier +from ..exceptions import NotFittedError from ..model_selection import train_test_split from ..tree import DecisionTreeRegressor -from ..tree._tree import DTYPE, DOUBLE -from . import _gb_losses - +from ..tree._tree import DOUBLE, DTYPE from ..utils import check_array, check_random_state, column_or_1d from ..utils._param_validation import HasMethods, Interval, StrOptions -from ..utils.validation import check_is_fitted, _check_sample_weight from ..utils.multiclass import check_classification_targets -from ..exceptions import NotFittedError +from ..utils.validation import _check_sample_weight, check_is_fitted +from . import _gb_losses +from ._base import BaseEnsemble +from ._gradient_boosting import _random_sample_mask, predict_stage, predict_stages class VerboseReporter: diff --git a/sklearn/ensemble/_gb_losses.py b/sklearn/ensemble/_gb_losses.py index db2116d9aa2e1..7fb7e4726c325 100644 --- a/sklearn/ensemble/_gb_losses.py +++ b/sklearn/ensemble/_gb_losses.py @@ -2,16 +2,14 @@ decision trees. """ -from abc import ABCMeta -from abc import abstractmethod +from abc import ABCMeta, abstractmethod import numpy as np from scipy.special import expit, logsumexp +from ..dummy import DummyClassifier, DummyRegressor from ..tree._tree import TREE_LEAF from ..utils.stats import _weighted_percentile -from ..dummy import DummyClassifier -from ..dummy import DummyRegressor class LossFunction(metaclass=ABCMeta): diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index 805a13b2d361b..8786e866d7be3 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -9,14 +9,14 @@ import numpy as np -from ...utils import check_random_state, check_array from ...base import BaseEstimator, TransformerMixin -from ...utils.validation import check_is_fitted -from ...utils.fixes import percentile +from ...utils import check_array, check_random_state from ...utils._openmp_helpers import _openmp_effective_n_threads +from ...utils.fixes import percentile +from ...utils.validation import check_is_fitted from ._binning import _map_to_bins -from .common import X_DTYPE, X_BINNED_DTYPE, ALMOST_INF, X_BITSET_INNER_DTYPE from ._bitset import set_bitset_memoryview +from .common import ALMOST_INF, X_BINNED_DTYPE, X_BITSET_INNER_DTYPE, X_DTYPE def _find_binning_thresholds(col_data, max_bins): diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index e44b6428f8f4e..136e8c3b29efe 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1,13 +1,14 @@ """Fast Gradient Boosting decision trees for classification and regression.""" # Author: Nicolas Hug +import itertools from abc import ABC, abstractmethod from functools import partial -import itertools -from numbers import Real, Integral +from numbers import Integral, Real +from timeit import default_timer as time import numpy as np -from timeit import default_timer as time + from ..._loss.loss import ( _LOSSES, BaseLoss, @@ -17,29 +18,31 @@ HalfPoissonLoss, PinballLoss, ) -from ...base import BaseEstimator, RegressorMixin, ClassifierMixin, is_classifier -from ...base import _fit_context -from ...utils import check_random_state, resample, compute_sample_weight -from ...utils.validation import ( - check_is_fitted, - check_consistent_length, - _check_sample_weight, - _check_monotonic_cst, +from ...base import ( + BaseEstimator, + ClassifierMixin, + RegressorMixin, + _fit_context, + is_classifier, ) -from ...utils._param_validation import Interval, StrOptions -from ...utils._param_validation import RealNotInt -from ...utils._openmp_helpers import _openmp_effective_n_threads -from ...utils.multiclass import check_classification_targets from ...metrics import check_scoring from ...model_selection import train_test_split from ...preprocessing import LabelEncoder +from ...utils import check_random_state, compute_sample_weight, resample +from ...utils._openmp_helpers import _openmp_effective_n_threads +from ...utils._param_validation import Interval, RealNotInt, StrOptions +from ...utils.multiclass import check_classification_targets +from ...utils.validation import ( + _check_monotonic_cst, + _check_sample_weight, + check_consistent_length, + check_is_fitted, +) from ._gradient_boosting import _update_raw_predictions -from .common import Y_DTYPE, X_DTYPE, G_H_DTYPE - from .binning import _BinMapper +from .common import G_H_DTYPE, X_DTYPE, Y_DTYPE from .grower import TreeGrower - _LOSSES = _LOSSES.copy() _LOSSES.update( { diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index b8c0c17969e99..4ed6041ecaa30 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -6,22 +6,25 @@ """ # Author: Nicolas Hug -from heapq import heappush, heappop -import numpy as np -from timeit import default_timer as time import numbers +from heapq import heappop, heappush +from timeit import default_timer as time -from .splitting import Splitter +import numpy as np + +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +from ._bitset import set_raw_bitset_from_binned_bitset +from .common import ( + PREDICTOR_RECORD_DTYPE, + X_BITSET_INNER_DTYPE, + Y_DTYPE, + MonotonicConstraint, +) from .histogram import HistogramBuilder from .predictor import TreePredictor +from .splitting import Splitter from .utils import sum_parallel -from .common import PREDICTOR_RECORD_DTYPE -from .common import X_BITSET_INNER_DTYPE -from .common import Y_DTYPE -from .common import MonotonicConstraint -from ._bitset import set_raw_bitset_from_binned_bitset -from sklearn.utils._openmp_helpers import _openmp_effective_n_threads - EPS = np.finfo(Y_DTYPE).eps # to avoid zero division errors diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py index 746fa34753121..600e55e43467f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py @@ -5,10 +5,12 @@ import numpy as np +from ._predictor import ( + _compute_partial_dependence, + _predict_from_binned_data, + _predict_from_raw_data, +) from .common import Y_DTYPE -from ._predictor import _predict_from_raw_data -from ._predictor import _predict_from_binned_data -from ._predictor import _compute_partial_dependence class TreePredictor: diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py index 08bfebfcbf6c9..6f9fcd0057141 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py @@ -1,15 +1,17 @@ import numpy as np -from numpy.testing import assert_array_equal, assert_allclose import pytest +from numpy.testing import assert_allclose, assert_array_equal from sklearn.ensemble._hist_gradient_boosting.binning import ( _BinMapper, _find_binning_thresholds, _map_to_bins, ) -from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF +from sklearn.ensemble._hist_gradient_boosting.common import ( + ALMOST_INF, + X_BINNED_DTYPE, + X_DTYPE, +) from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py index e058781cefcef..c02d66b666f80 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py @@ -1,10 +1,10 @@ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from sklearn.ensemble._hist_gradient_boosting._bitset import ( - set_bitset_memoryview, in_bitset_memoryview, + set_bitset_memoryview, set_raw_bitset_from_binned_bitset, ) from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py index 6bd5b38d5a4ee..bbdcb38ef013a 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py @@ -1,13 +1,15 @@ -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score -from sklearn.datasets import make_classification, make_regression import numpy as np import pytest -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split @pytest.mark.parametrize("seed", range(5)) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 94d8960b6e813..4851c8e129203 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1,36 +1,35 @@ +import re import warnings -import re import numpy as np import pytest from numpy.testing import assert_allclose, assert_array_equal + from sklearn._loss.loss import ( AbsoluteError, HalfBinomialLoss, HalfSquaredError, PinballLoss, ) -from sklearn.datasets import make_classification, make_regression -from sklearn.datasets import make_low_rank_matrix -from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder -from sklearn.model_selection import train_test_split, cross_val_score -from sklearn.base import clone, BaseEstimator, TransformerMixin -from sklearn.base import is_regressor -from sklearn.pipeline import make_pipeline -from sklearn.metrics import mean_gamma_deviance, mean_poisson_deviance -from sklearn.dummy import DummyRegressor -from sklearn.exceptions import NotFittedError +from sklearn.base import BaseEstimator, TransformerMixin, clone, is_regressor from sklearn.compose import make_column_transformer - -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.datasets import make_classification, make_low_rank_matrix, make_regression +from sklearn.dummy import DummyRegressor +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.exceptions import NotFittedError +from sklearn.metrics import mean_gamma_deviance, mean_poisson_deviance +from sklearn.model_selection import cross_val_score, train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder from sklearn.utils import shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads - n_threads = _openmp_effective_n_threads() X_classification, y_classification = make_classification(random_state=0) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py index f3380fbf2af6d..a55cb871e3c72 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py @@ -1,17 +1,18 @@ import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_equal from pytest import approx -from numpy.testing import assert_array_equal -from numpy.testing import assert_allclose -from sklearn.preprocessing import OneHotEncoder -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + X_BINNED_DTYPE, + X_BITSET_INNER_DTYPE, + X_DTYPE, + Y_DTYPE, +) +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.preprocessing import OneHotEncoder from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py index 1d5963d20739b..99f74b0f542ee 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py @@ -1,20 +1,20 @@ import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_equal -from numpy.testing import assert_allclose -from numpy.testing import assert_array_equal - +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + HISTOGRAM_DTYPE, + X_BINNED_DTYPE, +) from sklearn.ensemble._hist_gradient_boosting.histogram import ( - _build_histogram_naive, _build_histogram, + _build_histogram_naive, _build_histogram_no_hessian, - _build_histogram_root_no_hessian, _build_histogram_root, + _build_histogram_root_no_hessian, _subtract_histograms, ) -from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE @pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram]) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py index f11bec3bd77db..7782b5b32eb68 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py @@ -1,18 +1,23 @@ import re + import numpy as np import pytest +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + X_BINNED_DTYPE, + MonotonicConstraint, +) from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint +from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder from sklearn.ensemble._hist_gradient_boosting.splitting import ( Splitter, compute_node_value, ) -from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.utils._openmp_helpers import _openmp_effective_n_threads from sklearn.utils._testing import _convert_container diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py index 856ab180459d2..3c3c9ae81bac2 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py @@ -1,25 +1,25 @@ import numpy as np -from numpy.testing import assert_allclose -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split -from sklearn.metrics import r2_score import pytest +from numpy.testing import assert_allclose +from sklearn.datasets import make_regression +from sklearn.ensemble._hist_gradient_boosting._bitset import ( + set_bitset_memoryview, + set_raw_bitset_from_binned_bitset, +) from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower -from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor from sklearn.ensemble._hist_gradient_boosting.common import ( + ALMOST_INF, G_H_DTYPE, PREDICTOR_RECORD_DTYPE, - ALMOST_INF, X_BINNED_DTYPE, X_BITSET_INNER_DTYPE, X_DTYPE, ) -from sklearn.ensemble._hist_gradient_boosting._bitset import ( - set_bitset_memoryview, - set_raw_bitset_from_binned_bitset, -) +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor +from sklearn.metrics import r2_score +from sklearn.model_selection import train_test_split from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py index 255d13bb08456..f862273beadf5 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py @@ -2,17 +2,19 @@ import pytest from numpy.testing import assert_array_equal -from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + HISTOGRAM_DTYPE, + X_BINNED_DTYPE, + MonotonicConstraint, +) +from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder from sklearn.ensemble._hist_gradient_boosting.splitting import ( Splitter, compute_node_value, ) -from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder -from sklearn.utils._testing import skip_if_32bit from sklearn.utils._openmp_helpers import _openmp_effective_n_threads +from sklearn.utils._testing import skip_if_32bit n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index f8d7533ec38bc..03a2720b36127 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -1,17 +1,15 @@ import numpy as np -from numpy.testing import assert_array_equal -from numpy.testing import assert_allclose - import pytest +from numpy.testing import assert_allclose, assert_array_equal from sklearn.base import clone from sklearn.datasets import make_classification, make_regression - -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.metrics import check_scoring - X_classification, y_classification = make_classification(random_state=0) X_regression, y_regression = make_regression(random_state=0) diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py index 048a1d69395e2..9371d2e4e6c5b 100644 --- a/sklearn/ensemble/_iforest.py +++ b/sklearn/ensemble/_iforest.py @@ -3,25 +3,23 @@ # License: BSD 3 clause import numbers +from numbers import Integral, Real +from warnings import warn + import numpy as np from scipy.sparse import issparse -from warnings import warn -from numbers import Integral, Real +from ..base import OutlierMixin, _fit_context from ..tree import ExtraTreeRegressor from ..tree._tree import DTYPE as tree_dtype from ..utils import ( - check_random_state, check_array, + check_random_state, gen_batches, get_chunk_n_rows, ) -from ..utils._param_validation import Interval, StrOptions -from ..utils._param_validation import RealNotInt -from ..utils.validation import check_is_fitted, _num_samples -from ..base import OutlierMixin -from ..base import _fit_context - +from ..utils._param_validation import Interval, RealNotInt, StrOptions +from ..utils.validation import _num_samples, check_is_fitted from ._bagging import BaseBagging __all__ = ["IsolationForest"] diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 5b3486edfeb33..2129e4d9a0134 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -10,35 +10,32 @@ import numpy as np import scipy.sparse as sparse -from ..base import clone -from ..base import ClassifierMixin, RegressorMixin, TransformerMixin -from ..base import is_classifier, is_regressor -from ..base import _fit_context +from ..base import ( + ClassifierMixin, + RegressorMixin, + TransformerMixin, + _fit_context, + clone, + is_classifier, + is_regressor, +) from ..exceptions import NotFittedError -from ..utils._estimator_html_repr import _VisualBlock - -from ._base import _fit_single_estimator -from ._base import _BaseHeterogeneousEnsemble - -from ..linear_model import LogisticRegression -from ..linear_model import RidgeCV - -from ..model_selection import cross_val_predict -from ..model_selection import check_cv - +from ..linear_model import LogisticRegression, RidgeCV +from ..model_selection import check_cv, cross_val_predict from ..preprocessing import LabelEncoder - from ..utils import Bunch -from ..utils.multiclass import check_classification_targets, type_of_target -from ..utils.metaestimators import available_if -from ..utils.parallel import delayed, Parallel +from ..utils._estimator_html_repr import _VisualBlock from ..utils._param_validation import HasMethods, StrOptions +from ..utils.metaestimators import available_if +from ..utils.multiclass import check_classification_targets, type_of_target +from ..utils.parallel import Parallel, delayed from ..utils.validation import ( _check_feature_names_in, _check_response_method, check_is_fitted, column_or_1d, ) +from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator def _estimator_has(attr): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index f8f4d2c4c197f..50670a5a52699 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -18,24 +18,23 @@ import numpy as np -from ..base import ClassifierMixin -from ..base import RegressorMixin -from ..base import TransformerMixin -from ..base import clone -from ..base import _fit_context -from ._base import _fit_single_estimator -from ._base import _BaseHeterogeneousEnsemble +from ..base import ( + ClassifierMixin, + RegressorMixin, + TransformerMixin, + _fit_context, + clone, +) +from ..exceptions import NotFittedError from ..preprocessing import LabelEncoder from ..utils import Bunch +from ..utils._estimator_html_repr import _VisualBlock +from ..utils._param_validation import StrOptions from ..utils.metaestimators import available_if -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_feature_names_in from ..utils.multiclass import check_classification_targets -from ..utils.validation import column_or_1d -from ..utils._param_validation import StrOptions -from ..exceptions import NotFittedError -from ..utils._estimator_html_repr import _VisualBlock -from ..utils.parallel import delayed, Parallel +from ..utils.parallel import Parallel, delayed +from ..utils.validation import _check_feature_names_in, check_is_fitted, column_or_1d +from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 569609e6326e5..4beee0f09e6f2 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -23,28 +23,32 @@ # # License: BSD 3 clause +import warnings from abc import ABCMeta, abstractmethod - from numbers import Integral, Real -import numpy as np - -import warnings +import numpy as np from scipy.special import xlogy -from ._base import BaseEnsemble -from ..base import ClassifierMixin, RegressorMixin, is_classifier, is_regressor -from ..base import _fit_context -from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import check_random_state, _safe_indexing -from ..utils.extmath import softmax -from ..utils.extmath import stable_cumsum +from ..base import ( + ClassifierMixin, + RegressorMixin, + _fit_context, + is_classifier, + is_regressor, +) from ..metrics import accuracy_score, r2_score -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_sample_weight -from ..utils.validation import has_fit_parameter -from ..utils.validation import _num_samples +from ..tree import DecisionTreeClassifier, DecisionTreeRegressor +from ..utils import _safe_indexing, check_random_state from ..utils._param_validation import HasMethods, Interval, StrOptions +from ..utils.extmath import softmax, stable_cumsum +from ..utils.validation import ( + _check_sample_weight, + _num_samples, + check_is_fitted, + has_fit_parameter, +) +from ._base import BaseEnsemble __all__ = [ "AdaBoostClassifier", diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index f6311e8c459d4..2c1067ccfc248 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -4,35 +4,33 @@ # Author: Gilles Louppe # License: BSD 3 clause -from itertools import product +from itertools import cycle, product -import numpy as np import joblib +import numpy as np import pytest +from scipy.sparse import csc_matrix, csr_matrix from sklearn.base import BaseEstimator - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal +from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2 from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.model_selection import GridSearchCV, ParameterGrid -from sklearn.ensemble import BaggingClassifier, BaggingRegressor -from sklearn.linear_model import Perceptron, LogisticRegression +from sklearn.ensemble import ( + BaggingClassifier, + BaggingRegressor, + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.feature_selection import SelectKBest +from sklearn.linear_model import LogisticRegression, Perceptron +from sklearn.model_selection import GridSearchCV, ParameterGrid, train_test_split from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.svm import SVC, SVR -from sklearn.random_projection import SparseRandomProjection from sklearn.pipeline import make_pipeline -from sklearn.feature_selection import SelectKBest -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2 -from sklearn.utils import check_random_state from sklearn.preprocessing import FunctionTransformer, scale -from itertools import cycle - -from scipy.sparse import csc_matrix, csr_matrix +from sklearn.random_projection import SparseRandomProjection +from sklearn.svm import SVC, SVR +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal rng = check_random_state(0) diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index fe4b1e33ae7b3..8687d91053a22 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -5,19 +5,19 @@ # Authors: Gilles Louppe # License: BSD 3 clause +from collections import OrderedDict + import numpy as np import pytest +from sklearn import ensemble from sklearn.datasets import load_iris +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.ensemble import BaggingClassifier from sklearn.ensemble._base import _set_random_states -from sklearn.linear_model import Perceptron -from sklearn.linear_model import Ridge, LogisticRegression -from collections import OrderedDict -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.pipeline import Pipeline from sklearn.feature_selection import SelectFromModel -from sklearn import ensemble +from sklearn.linear_model import LogisticRegression, Perceptron, Ridge +from sklearn.pipeline import Pipeline def test_base(): diff --git a/sklearn/ensemble/tests/test_common.py b/sklearn/ensemble/tests/test_common.py index 5bafe08881ae9..7e14b34993d6f 100644 --- a/sklearn/ensemble/tests/test_common.py +++ b/sklearn/ensemble/tests/test_common.py @@ -1,21 +1,25 @@ import numpy as np import pytest -from sklearn.base import clone -from sklearn.base import ClassifierMixin -from sklearn.base import is_classifier - -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression -from sklearn.datasets import load_iris, load_diabetes +from sklearn.base import ClassifierMixin, clone, is_classifier +from sklearn.datasets import ( + load_diabetes, + load_iris, + make_classification, + make_regression, +) +from sklearn.ensemble import ( + RandomForestClassifier, + RandomForestRegressor, + StackingClassifier, + StackingRegressor, + VotingClassifier, + VotingRegressor, +) from sklearn.impute import SimpleImputer -from sklearn.linear_model import LogisticRegression, LinearRegression -from sklearn.svm import LinearSVC, LinearSVR, SVC, SVR +from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.pipeline import make_pipeline -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor - -from sklearn.ensemble import StackingClassifier, StackingRegressor -from sklearn.ensemble import VotingClassifier, VotingRegressor +from sklearn.svm import SVC, SVR, LinearSVC, LinearSVR X, y = load_iris(return_X_y=True) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 9ee29f717af88..15d2999b5ef4d 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -8,58 +8,54 @@ # Arnaud Joly # License: BSD 3 clause -import pickle +import itertools import math +import pickle from collections import defaultdict from functools import partial -import itertools -from itertools import combinations -from itertools import product -from typing import Dict, Any - -import numpy as np -from scipy.sparse import csr_matrix -from scipy.sparse import csc_matrix -from scipy.sparse import coo_matrix -from scipy.special import comb +from itertools import combinations, product +from typing import Any, Dict +from unittest.mock import patch import joblib - +import numpy as np import pytest +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix +from scipy.special import comb import sklearn -from sklearn.dummy import DummyRegressor -from sklearn.metrics import mean_poisson_deviance -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import _convert_container -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import skip_if_no_parallel - -from sklearn.exceptions import NotFittedError - from sklearn import datasets -from sklearn.decomposition import TruncatedSVD from sklearn.datasets import make_classification -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import ExtraTreesRegressor -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import RandomTreesEmbedding -from sklearn.metrics import explained_variance_score, f1_score -from sklearn.model_selection import train_test_split, cross_val_score -from sklearn.model_selection import GridSearchCV +from sklearn.decomposition import TruncatedSVD +from sklearn.dummy import DummyRegressor +from sklearn.ensemble import ( + ExtraTreesClassifier, + ExtraTreesRegressor, + RandomForestClassifier, + RandomForestRegressor, + RandomTreesEmbedding, +) +from sklearn.exceptions import NotFittedError +from sklearn.metrics import ( + explained_variance_score, + f1_score, + mean_poisson_deviance, + mean_squared_error, +) +from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.svm import LinearSVC +from sklearn.tree._classes import SPARSE_SPLITTERS +from sklearn.utils._testing import ( + _convert_container, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, + skip_if_no_parallel, +) from sklearn.utils.parallel import Parallel from sklearn.utils.validation import check_random_state -from sklearn.metrics import mean_squared_error - -from sklearn.tree._classes import SPARSE_SPLITTERS - -from unittest.mock import patch - # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y = [-1, -1, -1, 1, 1, 1] diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index ad31b2ed732e9..f46bf9959fa29 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -3,38 +3,34 @@ """ import re import warnings + import numpy as np +import pytest from numpy.testing import assert_allclose - -from scipy.sparse import csr_matrix -from scipy.sparse import csc_matrix -from scipy.sparse import coo_matrix +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix from scipy.special import expit -import pytest - from sklearn import datasets from sklearn.base import clone from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.ensemble import GradientBoostingRegressor +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor from sklearn.ensemble._gradient_boosting import predict_stages -from sklearn.preprocessing import scale +from sklearn.exceptions import DataConversionWarning, NotFittedError +from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import scale +from sklearn.svm import NuSVR from sklearn.utils import check_random_state, tosequence from sklearn.utils._mocking import NoSampleWeightWrapper -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import skip_if_32bit from sklearn.utils._param_validation import InvalidParameterError -from sklearn.exceptions import DataConversionWarning -from sklearn.exceptions import NotFittedError -from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.pipeline import make_pipeline -from sklearn.linear_model import LinearRegression -from sklearn.svm import NuSVR - +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_array_equal, + skip_if_32bit, +) GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier, GradientBoostingRegressor] @@ -674,9 +670,8 @@ def test_oob_multilcass_iris(): def test_verbose_output(): # Check verbose=1 does not cause error. - from io import StringIO - import sys + from io import StringIO old_stdout = sys.stdout sys.stdout = StringIO() @@ -706,8 +701,8 @@ def test_verbose_output(): def test_more_verbose_output(): # Check verbose=2 does not cause error. - from io import StringIO import sys + from io import StringIO old_stdout = sys.stdout sys.stdout = StringIO() diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py index e710be9504be3..df92c68801da2 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py +++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py @@ -2,22 +2,25 @@ Testing for the gradient boosting loss functions and initial estimators. """ from itertools import product + import numpy as np -from numpy.testing import assert_allclose import pytest +from numpy.testing import assert_allclose from pytest import approx -from sklearn.utils import check_random_state +from sklearn.ensemble._gb_losses import ( + LOSS_FUNCTIONS, + BinomialDeviance, + ExponentialLoss, + HuberLossFunction, + LeastAbsoluteError, + LeastSquaresError, + MultinomialDeviance, + QuantileLossFunction, + RegressionLossFunction, +) from sklearn.metrics import mean_pinball_loss -from sklearn.ensemble._gb_losses import RegressionLossFunction -from sklearn.ensemble._gb_losses import LeastSquaresError -from sklearn.ensemble._gb_losses import LeastAbsoluteError -from sklearn.ensemble._gb_losses import HuberLossFunction -from sklearn.ensemble._gb_losses import QuantileLossFunction -from sklearn.ensemble._gb_losses import BinomialDeviance -from sklearn.ensemble._gb_losses import MultinomialDeviance -from sklearn.ensemble._gb_losses import ExponentialLoss -from sklearn.ensemble._gb_losses import LOSS_FUNCTIONS +from sklearn.utils import check_random_state def test_binomial_deviance(): diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 7650dd5c14ce4..854ebdb701014 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -6,27 +6,25 @@ # Alexandre Gramfort # License: BSD 3 clause -import pytest import warnings +from unittest.mock import Mock, patch import numpy as np +import pytest +from scipy.sparse import csc_matrix, csr_matrix -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import assert_allclose - -from sklearn.model_selection import ParameterGrid +from sklearn.datasets import load_diabetes, load_iris, make_classification from sklearn.ensemble import IsolationForest from sklearn.ensemble._iforest import _average_path_length -from sklearn.model_selection import train_test_split -from sklearn.datasets import load_diabetes, load_iris, make_classification -from sklearn.utils import check_random_state from sklearn.metrics import roc_auc_score - -from scipy.sparse import csc_matrix, csr_matrix -from unittest.mock import Mock, patch - +from sklearn.model_selection import ParameterGrid, train_test_split +from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) # load iris & diabetes dataset iris = load_iris() diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index 2c04171fcd0f4..006b9cdb9e966 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -3,55 +3,47 @@ # Authors: Guillaume Lemaitre # License: BSD 3 clause -import pytest +from unittest.mock import Mock + import numpy as np -from numpy.testing import assert_array_equal +import pytest import scipy.sparse as sparse +from numpy.testing import assert_array_equal -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.base import RegressorMixin -from sklearn.base import clone - -from sklearn.exceptions import ConvergenceWarning - -from sklearn.datasets import load_iris -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_breast_cancer -from sklearn.datasets import make_regression -from sklearn.datasets import make_classification -from sklearn.datasets import make_multilabel_classification - -from sklearn.dummy import DummyClassifier -from sklearn.dummy import DummyRegressor -from sklearn.linear_model import LogisticRegression -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import Ridge -from sklearn.linear_model import RidgeClassifier -from sklearn.svm import LinearSVC -from sklearn.svm import LinearSVR -from sklearn.svm import SVC -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import RandomForestRegressor +from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone +from sklearn.datasets import ( + load_breast_cancer, + load_diabetes, + load_iris, + make_classification, + make_multilabel_classification, + make_regression, +) +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.ensemble import ( + RandomForestClassifier, + RandomForestRegressor, + StackingClassifier, + StackingRegressor, +) +from sklearn.exceptions import ConvergenceWarning, NotFittedError +from sklearn.linear_model import ( + LinearRegression, + LogisticRegression, + Ridge, + RidgeClassifier, +) +from sklearn.model_selection import KFold, StratifiedKFold, train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import scale - -from sklearn.ensemble import StackingClassifier -from sklearn.ensemble import StackingRegressor - -from sklearn.model_selection import train_test_split -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import KFold - +from sklearn.svm import SVC, LinearSVC, LinearSVR from sklearn.utils._mocking import CheckingClassifier -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import ignore_warnings - -from sklearn.exceptions import NotFittedError - -from unittest.mock import Mock +from sklearn.utils._testing import ( + assert_allclose, + assert_allclose_dense_sparse, + ignore_warnings, +) diabetes = load_diabetes() X_diabetes, y_diabetes = diabetes.data, diabetes.target diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index 56db8b3c7fbf5..52734fc031fde 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -1,30 +1,34 @@ """Testing for the VotingClassifier and VotingRegressor""" -import pytest import re + import numpy as np +import pytest -from sklearn.utils._testing import assert_almost_equal, assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.exceptions import NotFittedError -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import LogisticRegression -from sklearn.naive_bayes import GaussianNB -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import VotingClassifier, VotingRegressor -from sklearn.tree import DecisionTreeClassifier -from sklearn.tree import DecisionTreeRegressor -from sklearn.model_selection import GridSearchCV from sklearn import datasets -from sklearn.model_selection import cross_val_score, train_test_split +from sklearn.base import BaseEstimator, ClassifierMixin, clone from sklearn.datasets import make_multilabel_classification -from sklearn.svm import SVC +from sklearn.dummy import DummyRegressor +from sklearn.ensemble import ( + RandomForestClassifier, + RandomForestRegressor, + VotingClassifier, + VotingRegressor, +) +from sklearn.exceptions import NotFittedError +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.multiclass import OneVsRestClassifier +from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier -from sklearn.base import BaseEstimator, ClassifierMixin, clone -from sklearn.dummy import DummyRegressor from sklearn.preprocessing import StandardScaler +from sklearn.svm import SVC +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) # Load datasets iris = datasets.load_iris() diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index a5b0f7a49ce47..a8e0f06340dc4 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -1,33 +1,27 @@ """Testing for the boost module (sklearn.ensemble.boost).""" -import numpy as np -import pytest import re -from scipy.sparse import csc_matrix -from scipy.sparse import csr_matrix -from scipy.sparse import coo_matrix -from scipy.sparse import dok_matrix -from scipy.sparse import lil_matrix - -from sklearn.utils._testing import assert_array_equal, assert_array_less -from sklearn.utils._testing import assert_array_almost_equal +import numpy as np +import pytest +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, dok_matrix, lil_matrix -from sklearn.base import BaseEstimator -from sklearn.base import clone +from sklearn import datasets +from sklearn.base import BaseEstimator, clone from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.linear_model import LinearRegression -from sklearn.model_selection import train_test_split -from sklearn.model_selection import GridSearchCV -from sklearn.ensemble import AdaBoostClassifier -from sklearn.ensemble import AdaBoostRegressor +from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor from sklearn.ensemble._weight_boosting import _samme_proba +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.svm import SVC, SVR from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils import shuffle from sklearn.utils._mocking import NoSampleWeightWrapper -from sklearn import datasets - +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_array_equal, + assert_array_less, +) # Common random state rng = np.random.RandomState(0) diff --git a/sklearn/experimental/enable_halving_search_cv.py b/sklearn/experimental/enable_halving_search_cv.py index f6937b0d14c01..dd399ef35b6f7 100644 --- a/sklearn/experimental/enable_halving_search_cv.py +++ b/sklearn/experimental/enable_halving_search_cv.py @@ -19,13 +19,12 @@ flake8 to ignore the import, which appears as unused. """ +from .. import model_selection from ..model_selection._search_successive_halving import ( - HalvingRandomSearchCV, HalvingGridSearchCV, + HalvingRandomSearchCV, ) -from .. import model_selection - # use settattr to avoid mypy errors when monkeypatching setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV) setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV) diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py index f0416ac013e96..d287400c7999f 100644 --- a/sklearn/experimental/enable_hist_gradient_boosting.py +++ b/sklearn/experimental/enable_hist_gradient_boosting.py @@ -12,7 +12,6 @@ import warnings - warnings.warn( "Since version 1.0, " "it is not needed to import enable_hist_gradient_boosting anymore. " diff --git a/sklearn/experimental/enable_iterative_imputer.py b/sklearn/experimental/enable_iterative_imputer.py index 9ef9f6a0dbdf0..0b906961ca184 100644 --- a/sklearn/experimental/enable_iterative_imputer.py +++ b/sklearn/experimental/enable_iterative_imputer.py @@ -12,8 +12,8 @@ >>> from sklearn.impute import IterativeImputer """ -from ..impute._iterative import IterativeImputer from .. import impute +from ..impute._iterative import IterativeImputer # use settattr to avoid mypy errors when monkeypatching setattr(impute, "IterativeImputer", IterativeImputer) diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py index a9c1496181b3b..f4db85303f4b6 100644 --- a/sklearn/feature_extraction/__init__.py +++ b/sklearn/feature_extraction/__init__.py @@ -4,10 +4,10 @@ images. """ +from . import text from ._dict_vectorizer import DictVectorizer from ._hash import FeatureHasher -from .image import img_to_graph, grid_to_graph -from . import text +from .image import grid_to_graph, img_to_graph __all__ = [ "DictVectorizer", diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index 60e2cb3b7ad84..e32de4be42462 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -3,15 +3,14 @@ # License: BSD 3 clause from array import array -from collections.abc import Mapping, Iterable -from operator import itemgetter +from collections.abc import Iterable, Mapping from numbers import Number +from operator import itemgetter import numpy as np import scipy.sparse as sp -from ..base import BaseEstimator, TransformerMixin -from ..base import _fit_context +from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils import check_array from ..utils.validation import check_is_fitted diff --git a/sklearn/feature_extraction/_hash.py b/sklearn/feature_extraction/_hash.py index e1b5e5f2561fe..e0941ed1dac97 100644 --- a/sklearn/feature_extraction/_hash.py +++ b/sklearn/feature_extraction/_hash.py @@ -1,16 +1,15 @@ # Author: Lars Buitinck # License: BSD 3 clause -from numbers import Integral from itertools import chain +from numbers import Integral import numpy as np import scipy.sparse as sp -from ..base import BaseEstimator, TransformerMixin -from ..base import _fit_context -from ._hashing_fast import transform as _hashing_transform +from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils._param_validation import Interval, StrOptions +from ._hashing_fast import transform as _hashing_transform def _iteritems(d): diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py index beea3e23e0adc..da5d26b76e8ad 100644 --- a/sklearn/feature_extraction/image.py +++ b/sklearn/feature_extraction/image.py @@ -11,15 +11,14 @@ from itertools import product from numbers import Integral, Number, Real + import numpy as np -from scipy import sparse from numpy.lib.stride_tricks import as_strided +from scipy import sparse -from ..base import BaseEstimator, TransformerMixin -from ..base import _fit_context +from ..base import BaseEstimator, TransformerMixin, _fit_context from ..utils import check_array, check_random_state -from ..utils._param_validation import Hidden, Interval, validate_params -from ..utils._param_validation import RealNotInt +from ..utils._param_validation import Hidden, Interval, RealNotInt, validate_params __all__ = [ "PatchExtractor", diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index c8b9aaa8b5c8a..7e3c7f259ea03 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -3,12 +3,11 @@ # License: BSD 3 clause from random import Random -import numpy as np -import scipy.sparse as sp -from numpy.testing import assert_array_equal -from numpy.testing import assert_allclose +import numpy as np import pytest +import scipy.sparse as sp +from numpy.testing import assert_allclose, assert_array_equal from sklearn.feature_extraction import DictVectorizer from sklearn.feature_selection import SelectKBest, chi2 diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py index b074620f8c029..945a7cb3ca8f9 100644 --- a/sklearn/feature_extraction/tests/test_feature_hasher.py +++ b/sklearn/feature_extraction/tests/test_feature_hasher.py @@ -1,6 +1,6 @@ import numpy as np -from numpy.testing import assert_array_equal import pytest +from numpy.testing import assert_array_equal from sklearn.feature_extraction import FeatureHasher from sklearn.feature_extraction._hashing_fast import transform as _hashing_transform diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py index 5a89062e7de19..375652c848db6 100644 --- a/sklearn/feature_extraction/tests/test_image.py +++ b/sklearn/feature_extraction/tests/test_image.py @@ -3,17 +3,17 @@ # License: BSD 3 clause import numpy as np +import pytest from scipy import ndimage from scipy.sparse.csgraph import connected_components -import pytest from sklearn.feature_extraction.image import ( - img_to_graph, - grid_to_graph, - extract_patches_2d, - reconstruct_from_patches_2d, PatchExtractor, _extract_patches, + extract_patches_2d, + grid_to_graph, + img_to_graph, + reconstruct_from_patches_2d, ) diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 80a42aaea5af0..fc35053b40251 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -1,43 +1,37 @@ -from collections.abc import Mapping +import pickle import re +import warnings +from collections import defaultdict +from collections.abc import Mapping +from functools import partial +from io import StringIO +import numpy as np import pytest -import warnings +from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse -from sklearn.feature_extraction.text import strip_tags -from sklearn.feature_extraction.text import strip_accents_unicode -from sklearn.feature_extraction.text import strip_accents_ascii - -from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.feature_extraction.text import TfidfTransformer -from sklearn.feature_extraction.text import TfidfVectorizer - -from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS - -from sklearn.model_selection import train_test_split -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import GridSearchCV +from sklearn.base import clone +from sklearn.feature_extraction.text import ( + ENGLISH_STOP_WORDS, + CountVectorizer, + HashingVectorizer, + TfidfTransformer, + TfidfVectorizer, + strip_accents_ascii, + strip_accents_unicode, + strip_tags, +) +from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC - -from sklearn.base import clone - -import numpy as np -from numpy.testing import assert_array_almost_equal -from numpy.testing import assert_array_equal from sklearn.utils import IS_PYPY from sklearn.utils._testing import ( + assert_allclose_dense_sparse, assert_almost_equal, fails_if_pypy, - assert_allclose_dense_sparse, skip_if_32bit, ) -from collections import defaultdict -from functools import partial -import pickle -from io import StringIO JUNK_FOOD_DOCS = ( "the pizza pizza beer copyright", diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 3201e3a0d51bb..4b4b4396d1863 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -12,29 +12,26 @@ """ import array +import re +import unicodedata +import warnings from collections import defaultdict from collections.abc import Mapping from functools import partial from numbers import Integral from operator import itemgetter -import re -import unicodedata -import warnings import numpy as np import scipy.sparse as sp -from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin -from ..base import _fit_context +from ..base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin, _fit_context +from ..exceptions import NotFittedError from ..preprocessing import normalize +from ..utils import _IS_32BIT +from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions +from ..utils.validation import FLOAT_DTYPES, check_array, check_is_fitted from ._hash import FeatureHasher from ._stop_words import ENGLISH_STOP_WORDS -from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES -from ..utils import _IS_32BIT -from ..exceptions import NotFittedError -from ..utils._param_validation import StrOptions, Interval, HasMethods -from ..utils._param_validation import RealNotInt - __all__ = [ "HashingVectorizer", diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py index ce5fbc10ee459..4fbc631155078 100644 --- a/sklearn/feature_selection/__init__.py +++ b/sklearn/feature_selection/__init__.py @@ -4,31 +4,25 @@ recursive feature elimination algorithm. """ -from ._univariate_selection import chi2 -from ._univariate_selection import f_classif -from ._univariate_selection import f_oneway -from ._univariate_selection import f_regression -from ._univariate_selection import r_regression -from ._univariate_selection import SelectPercentile -from ._univariate_selection import SelectKBest -from ._univariate_selection import SelectFpr -from ._univariate_selection import SelectFdr -from ._univariate_selection import SelectFwe -from ._univariate_selection import GenericUnivariateSelect - -from ._variance_threshold import VarianceThreshold - -from ._rfe import RFE -from ._rfe import RFECV - +from ._base import SelectorMixin from ._from_model import SelectFromModel - +from ._mutual_info import mutual_info_classif, mutual_info_regression +from ._rfe import RFE, RFECV from ._sequential import SequentialFeatureSelector - -from ._mutual_info import mutual_info_regression, mutual_info_classif - -from ._base import SelectorMixin - +from ._univariate_selection import ( + GenericUnivariateSelect, + SelectFdr, + SelectFpr, + SelectFwe, + SelectKBest, + SelectPercentile, + chi2, + f_classif, + f_oneway, + f_regression, + r_regression, +) +from ._variance_threshold import VarianceThreshold __all__ = [ "GenericUnivariateSelect", diff --git a/sklearn/feature_selection/_base.py b/sklearn/feature_selection/_base.py index 100af272038ad..9ede37c98c75b 100644 --- a/sklearn/feature_selection/_base.py +++ b/sklearn/feature_selection/_base.py @@ -8,16 +8,16 @@ from operator import attrgetter import numpy as np -from scipy.sparse import issparse, csc_matrix +from scipy.sparse import csc_matrix, issparse from ..base import TransformerMixin from ..utils import ( + _safe_indexing, check_array, safe_sqr, ) -from ..utils._tags import _safe_tags -from ..utils import _safe_indexing from ..utils._set_output import _get_output_config +from ..utils._tags import _safe_tags from ..utils.validation import _check_feature_names_in, check_is_fitted diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py index 47f98d89e8abe..d3a287007bd49 100644 --- a/sklearn/feature_selection/_from_model.py +++ b/sklearn/feature_selection/_from_model.py @@ -2,20 +2,17 @@ # License: BSD 3 clause from copy import deepcopy - -import numpy as np from numbers import Integral, Real -from ._base import SelectorMixin -from ._base import _get_feature_importances -from ..base import BaseEstimator, clone, MetaEstimatorMixin -from ..base import _fit_context -from ..utils._tags import _safe_tags -from ..utils.validation import check_is_fitted, check_scalar, _num_features -from ..utils._param_validation import HasMethods, Interval, Options +import numpy as np +from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone from ..exceptions import NotFittedError +from ..utils._param_validation import HasMethods, Interval, Options +from ..utils._tags import _safe_tags from ..utils.metaestimators import available_if +from ..utils.validation import _num_features, check_is_fitted, check_scalar +from ._base import SelectorMixin, _get_feature_importances def _calculate_threshold(estimator, importances, threshold): diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py index 9cacfc3890784..b3de388c0811a 100644 --- a/sklearn/feature_selection/_mutual_info.py +++ b/sklearn/feature_selection/_mutual_info.py @@ -1,18 +1,19 @@ # Author: Nikolay Mayorov # License: 3-clause BSD -import numpy as np from numbers import Integral + +import numpy as np from scipy.sparse import issparse from scipy.special import digamma from ..metrics.cluster import mutual_info_score -from ..neighbors import NearestNeighbors, KDTree +from ..neighbors import KDTree, NearestNeighbors from ..preprocessing import scale from ..utils import check_random_state -from ..utils.validation import check_array, check_X_y -from ..utils.multiclass import check_classification_targets from ..utils._param_validation import Interval, StrOptions, validate_params +from ..utils.multiclass import check_classification_targets +from ..utils.validation import check_array, check_X_y def _compute_mi_cc(x, y, n_neighbors): diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 932d66449ae22..11cf083992653 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -6,28 +6,21 @@ """Recursive feature elimination for feature ranking""" -import numpy as np from numbers import Integral -from joblib import effective_n_jobs +import numpy as np +from joblib import effective_n_jobs -from ..utils.metaestimators import available_if -from ..utils.metaestimators import _safe_split -from ..utils._param_validation import HasMethods, Interval -from ..utils._param_validation import RealNotInt -from ..utils._tags import _safe_tags -from ..utils.validation import check_is_fitted -from ..utils.parallel import delayed, Parallel -from ..base import BaseEstimator -from ..base import MetaEstimatorMixin -from ..base import clone -from ..base import is_classifier -from ..base import _fit_context +from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier +from ..metrics import check_scoring from ..model_selection import check_cv from ..model_selection._validation import _score -from ..metrics import check_scoring -from ._base import SelectorMixin -from ._base import _get_feature_importances +from ..utils._param_validation import HasMethods, Interval, RealNotInt +from ..utils._tags import _safe_tags +from ..utils.metaestimators import _safe_split, available_if +from ..utils.parallel import Parallel, delayed +from ..utils.validation import check_is_fitted +from ._base import SelectorMixin, _get_feature_importances def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer): diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py index 0fbe91273053b..78a1c86df49de 100644 --- a/sklearn/feature_selection/_sequential.py +++ b/sklearn/feature_selection/_sequential.py @@ -5,15 +5,13 @@ import numpy as np -from ._base import SelectorMixin -from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier -from ..base import _fit_context -from ..utils._param_validation import HasMethods, Interval, StrOptions -from ..utils._param_validation import RealNotInt +from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier +from ..metrics import get_scorer_names +from ..model_selection import check_cv, cross_val_score +from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions from ..utils._tags import _safe_tags from ..utils.validation import check_is_fitted -from ..model_selection import cross_val_score, check_cv -from ..metrics import get_scorer_names +from ._base import SelectorMixin class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator): diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py index f4355c39f88cd..fc1fcbc01a151 100644 --- a/sklearn/feature_selection/_univariate_selection.py +++ b/sklearn/feature_selection/_univariate_selection.py @@ -5,20 +5,19 @@ # License: BSD 3 clause -import numpy as np import warnings - from numbers import Integral, Real + +import numpy as np from scipy import special, stats from scipy.sparse import issparse -from ..base import BaseEstimator -from ..base import _fit_context +from ..base import BaseEstimator, _fit_context from ..preprocessing import LabelBinarizer -from ..utils import as_float_array, check_array, check_X_y, safe_sqr, safe_mask -from ..utils.extmath import safe_sparse_dot, row_norms -from ..utils.validation import check_is_fitted +from ..utils import as_float_array, check_array, check_X_y, safe_mask, safe_sqr from ..utils._param_validation import Interval, StrOptions, validate_params +from ..utils.extmath import row_norms, safe_sparse_dot +from ..utils.validation import check_is_fitted from ._base import SelectorMixin diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index 073a22c6ad92b..f97c75db1e34b 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -3,12 +3,12 @@ from numbers import Real import numpy as np -from ..base import BaseEstimator -from ..base import _fit_context -from ._base import SelectorMixin + +from ..base import BaseEstimator, _fit_context +from ..utils._param_validation import Interval from ..utils.sparsefuncs import mean_variance_axis, min_max_axis from ..utils.validation import check_is_fitted -from ..utils._param_validation import Interval +from ._base import SelectorMixin class VarianceThreshold(SelectorMixin, BaseEstimator): diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py index 9869a1c03e677..bf883797ddabd 100644 --- a/sklearn/feature_selection/tests/test_base.py +++ b/sklearn/feature_selection/tests/test_base.py @@ -1,8 +1,7 @@ import numpy as np import pytest -from scipy import sparse as sp - from numpy.testing import assert_array_equal +from scipy import sparse as sp from sklearn.base import BaseEstimator from sklearn.feature_selection._base import SelectorMixin diff --git a/sklearn/feature_selection/tests/test_chi2.py b/sklearn/feature_selection/tests/test_chi2.py index d7d830459e455..4fdc652a998a9 100644 --- a/sklearn/feature_selection/tests/test_chi2.py +++ b/sklearn/feature_selection/tests/test_chi2.py @@ -7,13 +7,12 @@ import numpy as np import pytest -from scipy.sparse import coo_matrix, csr_matrix import scipy.stats +from scipy.sparse import coo_matrix, csr_matrix from sklearn.feature_selection import SelectKBest, chi2 from sklearn.feature_selection._univariate_selection import _chisquare -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal # Feature 0 is highly informative for class 1; # feature 1 is the same everywhere; diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index ff51243bb1378..b182aca270e06 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -3,35 +3,36 @@ """ import itertools import warnings -import numpy as np -from numpy.testing import assert_allclose -from scipy import stats, sparse +import numpy as np import pytest +from numpy.testing import assert_allclose +from scipy import sparse, stats -from sklearn.utils._testing import assert_almost_equal, _convert_container -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils import safe_mask - -from sklearn.datasets import make_classification, make_regression, load_iris +from sklearn.datasets import load_iris, make_classification, make_regression from sklearn.feature_selection import ( + GenericUnivariateSelect, + SelectFdr, + SelectFpr, + SelectFwe, + SelectKBest, + SelectPercentile, chi2, f_classif, f_oneway, f_regression, - GenericUnivariateSelect, mutual_info_classif, mutual_info_regression, r_regression, - SelectPercentile, - SelectKBest, - SelectFpr, - SelectFdr, - SelectFwe, ) - +from sklearn.utils import safe_mask +from sklearn.utils._testing import ( + _convert_container, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) ############################################################################## # Test the score functions diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 7b408201bc7f5..aa802136c2f39 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -1,34 +1,36 @@ import re -import pytest -import numpy as np import warnings from unittest.mock import Mock -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import skip_if_32bit -from sklearn.utils._testing import MinimalClassifier +import numpy as np +import pytest from sklearn import datasets +from sklearn.base import BaseEstimator from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression from sklearn.datasets import make_friedman1 +from sklearn.decomposition import PCA +from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier from sklearn.exceptions import NotFittedError +from sklearn.feature_selection import SelectFromModel from sklearn.linear_model import ( - LogisticRegression, - SGDClassifier, - Lasso, - LassoCV, ElasticNet, ElasticNetCV, + Lasso, + LassoCV, + LogisticRegression, + PassiveAggressiveClassifier, + SGDClassifier, ) -from sklearn.svm import LinearSVC -from sklearn.feature_selection import SelectFromModel -from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.base import BaseEstimator from sklearn.pipeline import make_pipeline -from sklearn.decomposition import PCA +from sklearn.svm import LinearSVC +from sklearn.utils._testing import ( + MinimalClassifier, + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + skip_if_32bit, +) class NaNTag(BaseEstimator): diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py index f39e4a5738b21..f7b4af0a393f9 100644 --- a/sklearn/feature_selection/tests/test_mutual_info.py +++ b/sklearn/feature_selection/tests/test_mutual_info.py @@ -2,13 +2,13 @@ import pytest from scipy.sparse import csr_matrix +from sklearn.feature_selection import mutual_info_classif, mutual_info_regression +from sklearn.feature_selection._mutual_info import _compute_mi from sklearn.utils import check_random_state from sklearn.utils._testing import ( - assert_array_equal, assert_allclose, + assert_array_equal, ) -from sklearn.feature_selection._mutual_info import _compute_mi -from sklearn.feature_selection import mutual_info_regression, mutual_info_classif def test_compute_mi_dd(): diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index fa7aeea19be6c..0f141f3461d7f 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -4,31 +4,26 @@ from operator import attrgetter -import pytest import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose +import pytest +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal from scipy import sparse from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA -from sklearn.feature_selection import RFE, RFECV +from sklearn.compose import TransformedTargetRegressor +from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression from sklearn.datasets import load_iris, make_friedman1 -from sklearn.metrics import zero_one_loss -from sklearn.svm import SVC, SVR, LinearSVR -from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import GroupKFold -from sklearn.compose import TransformedTargetRegressor +from sklearn.feature_selection import RFE, RFECV +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import get_scorer, make_scorer, zero_one_loss +from sklearn.model_selection import GroupKFold, cross_val_score from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler - +from sklearn.svm import SVC, SVR, LinearSVR from sklearn.utils import check_random_state from sklearn.utils._testing import ignore_warnings -from sklearn.metrics import make_scorer -from sklearn.metrics import get_scorer - class MockClassifier: """ @@ -278,8 +273,8 @@ def test_rfecv_mockclassifier(): def test_rfecv_verbose_output(): # Check verbose=1 is producing an output. - from io import StringIO import sys + from io import StringIO sys.stdout = StringIO() diff --git a/sklearn/feature_selection/tests/test_sequential.py b/sklearn/feature_selection/tests/test_sequential.py index a1ea1d4677dd4..a515bf22cdda3 100644 --- a/sklearn/feature_selection/tests/test_sequential.py +++ b/sklearn/feature_selection/tests/test_sequential.py @@ -1,17 +1,17 @@ +import numpy as np import pytest import scipy -import numpy as np from numpy.testing import assert_array_equal -from sklearn.preprocessing import StandardScaler -from sklearn.pipeline import make_pipeline +from sklearn.cluster import KMeans +from sklearn.datasets import make_blobs, make_classification, make_regression +from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.feature_selection import SequentialFeatureSelector -from sklearn.datasets import make_regression, make_blobs, make_classification from sklearn.linear_model import LinearRegression -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.model_selection import cross_val_score, LeaveOneGroupOut -from sklearn.cluster import KMeans +from sklearn.model_selection import LeaveOneGroupOut, cross_val_score from sklearn.neighbors import KNeighborsClassifier +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler def test_bad_n_features_to_select(): diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py index 4bce46556a666..190d016952980 100644 --- a/sklearn/feature_selection/tests/test_variance_threshold.py +++ b/sklearn/feature_selection/tests/test_variance_threshold.py @@ -1,11 +1,9 @@ import numpy as np import pytest - -from sklearn.utils._testing import assert_array_equal - from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix from sklearn.feature_selection import VarianceThreshold +from sklearn.utils._testing import assert_array_equal data = [[0, 1, 2, 3, 4], [0, 2, 2, 3, 5], [1, 1, 2, 4, 0]] diff --git a/sklearn/gaussian_process/__init__.py b/sklearn/gaussian_process/__init__.py index 719208b7951be..bc0d902b45b18 100644 --- a/sklearn/gaussian_process/__init__.py +++ b/sklearn/gaussian_process/__init__.py @@ -8,9 +8,8 @@ based regression and classification. """ -from ._gpr import GaussianProcessRegressor -from ._gpc import GaussianProcessClassifier from . import kernels - +from ._gpc import GaussianProcessClassifier +from ._gpr import GaussianProcessRegressor __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"] diff --git a/sklearn/gaussian_process/_gpc.py b/sklearn/gaussian_process/_gpc.py index 50a8739372972..013815795a853 100644 --- a/sklearn/gaussian_process/_gpc.py +++ b/sklearn/gaussian_process/_gpc.py @@ -8,20 +8,19 @@ from operator import itemgetter import numpy as np -from scipy.linalg import cholesky, cho_solve, solve import scipy.optimize +from scipy.linalg import cho_solve, cholesky, solve from scipy.special import erf, expit -from ..base import BaseEstimator, ClassifierMixin, clone -from ..base import _fit_context -from .kernels import Kernel, RBF, CompoundKernel, ConstantKernel as C -from ..utils.validation import check_is_fitted +from ..base import BaseEstimator, ClassifierMixin, _fit_context, clone +from ..multiclass import OneVsOneClassifier, OneVsRestClassifier +from ..preprocessing import LabelEncoder from ..utils import check_random_state -from ..utils.optimize import _check_optimize_result from ..utils._param_validation import Interval, StrOptions -from ..preprocessing import LabelEncoder -from ..multiclass import OneVsRestClassifier, OneVsOneClassifier - +from ..utils.optimize import _check_optimize_result +from ..utils.validation import check_is_fitted +from .kernels import RBF, CompoundKernel, Kernel +from .kernels import ConstantKernel as C # Values required for approximating the logistic sigmoid by # error functions. coefs are obtained via: diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index 49fcab40c25f8..90bbe7e446917 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -9,17 +9,16 @@ from operator import itemgetter import numpy as np -from scipy.linalg import cholesky, cho_solve, solve_triangular import scipy.optimize +from scipy.linalg import cho_solve, cholesky, solve_triangular -from ..base import BaseEstimator, RegressorMixin, clone -from ..base import MultiOutputMixin -from ..base import _fit_context -from .kernels import Kernel, RBF, ConstantKernel as C +from ..base import BaseEstimator, MultiOutputMixin, RegressorMixin, _fit_context, clone from ..preprocessing._data import _handle_zeros_in_scale from ..utils import check_random_state -from ..utils.optimize import _check_optimize_result from ..utils._param_validation import Interval, StrOptions +from ..utils.optimize import _check_optimize_result +from .kernels import RBF, Kernel +from .kernels import ConstantKernel as C GPR_CHOLESKY_LOWER = True diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 1e0866afb6a4d..95db7b13c33ff 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -19,21 +19,20 @@ # Note: this module is strongly inspired by the kernel module of the george # package. +import math +import warnings from abc import ABCMeta, abstractmethod from collections import namedtuple -import math from inspect import signature import numpy as np -from scipy.special import kv, gamma -from scipy.spatial.distance import pdist, cdist, squareform +from scipy.spatial.distance import cdist, pdist, squareform +from scipy.special import gamma, kv -from ..metrics.pairwise import pairwise_kernels from ..base import clone -from ..utils.validation import _num_samples from ..exceptions import ConvergenceWarning - -import warnings +from ..metrics.pairwise import pairwise_kernels +from ..utils.validation import _num_samples def _check_length_scale(X, length_scale): diff --git a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py index ad81890680168..4667329aff9b8 100644 --- a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py +++ b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py @@ -1,8 +1,12 @@ -from sklearn.gaussian_process.kernels import Kernel, Hyperparameter -from sklearn.gaussian_process.kernels import GenericKernelMixin -from sklearn.gaussian_process.kernels import StationaryKernelMixin import numpy as np + from sklearn.base import clone +from sklearn.gaussian_process.kernels import ( + GenericKernelMixin, + Hyperparameter, + Kernel, + StationaryKernelMixin, +) class MiniSeqKernel(GenericKernelMixin, StationaryKernelMixin, Kernel): diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py index aefdb2e8ff0e2..842159f13ac04 100644 --- a/sklearn/gaussian_process/tests/test_gpc.py +++ b/sklearn/gaussian_process/tests/test_gpc.py @@ -4,22 +4,22 @@ # License: BSD 3 clause import warnings -import numpy as np - -from scipy.optimize import approx_fprime +import numpy as np import pytest +from scipy.optimize import approx_fprime +from sklearn.exceptions import ConvergenceWarning from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import ( RBF, CompoundKernel, - ConstantKernel as C, WhiteKernel, ) +from sklearn.gaussian_process.kernels import ( + ConstantKernel as C, +) from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel -from sklearn.exceptions import ConvergenceWarning - from sklearn.utils._testing import assert_almost_equal, assert_array_equal diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index 2de35d4659ce6..d890dc05d9f02 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -4,29 +4,31 @@ # Modified by: Pete Green # License: BSD 3 clause -import warnings -import sys import re -import numpy as np - -from scipy.optimize import approx_fprime +import sys +import warnings +import numpy as np import pytest +from scipy.optimize import approx_fprime +from sklearn.exceptions import ConvergenceWarning from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import ( RBF, - ConstantKernel as C, + DotProduct, + ExpSineSquared, WhiteKernel, ) -from sklearn.gaussian_process.kernels import DotProduct, ExpSineSquared +from sklearn.gaussian_process.kernels import ( + ConstantKernel as C, +) from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel -from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ( - assert_array_less, + assert_allclose, assert_almost_equal, assert_array_almost_equal, - assert_allclose, + assert_array_less, ) diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py index 56ab9c8b6c2bf..8733f94c94e06 100644 --- a/sklearn/gaussian_process/tests/test_kernels.py +++ b/sklearn/gaussian_process/tests/test_kernels.py @@ -3,40 +3,38 @@ # Author: Jan Hendrik Metzen # License: BSD 3 clause -import pytest -import numpy as np from inspect import signature -from sklearn.gaussian_process.kernels import _approx_fprime +import numpy as np +import pytest -from sklearn.metrics.pairwise import ( - PAIRWISE_KERNEL_FUNCTIONS, - euclidean_distances, - pairwise_kernels, -) +from sklearn.base import clone from sklearn.gaussian_process.kernels import ( RBF, + CompoundKernel, + ConstantKernel, + DotProduct, + Exponentiation, + ExpSineSquared, + KernelOperator, Matern, + PairwiseKernel, RationalQuadratic, - ExpSineSquared, - DotProduct, - ConstantKernel, WhiteKernel, - PairwiseKernel, - KernelOperator, - Exponentiation, - CompoundKernel, + _approx_fprime, +) +from sklearn.metrics.pairwise import ( + PAIRWISE_KERNEL_FUNCTIONS, + euclidean_distances, + pairwise_kernels, ) -from sklearn.base import clone - from sklearn.utils._testing import ( + assert_allclose, assert_almost_equal, - assert_array_equal, assert_array_almost_equal, - assert_allclose, + assert_array_equal, ) - X = np.random.RandomState(0).normal(0, 1, (5, 2)) Y = np.random.RandomState(0).normal(0, 1, (6, 2)) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 37fc43731514a..9245a107adf4f 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -10,17 +10,13 @@ import numpy.ma as ma from scipy import sparse as sp -from ..base import BaseEstimator, TransformerMixin -from ..base import _fit_context -from ..utils._param_validation import StrOptions, MissingValues +from ..base import BaseEstimator, TransformerMixin, _fit_context +from ..utils import _is_pandas_na, is_scalar_nan +from ..utils._mask import _get_mask +from ..utils._param_validation import MissingValues, StrOptions from ..utils.fixes import _mode from ..utils.sparsefuncs import _get_median -from ..utils.validation import check_is_fitted -from ..utils.validation import FLOAT_DTYPES -from ..utils.validation import _check_feature_names_in -from ..utils._mask import _get_mask -from ..utils import _is_pandas_na -from ..utils import is_scalar_nan +from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted def _check_inputs_dtype(X, missing_values): diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index f977e5bc23e6c..a0087a5a10d55 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -1,31 +1,25 @@ -from time import time +import warnings from collections import namedtuple from numbers import Integral, Real -import warnings +from time import time -from scipy import stats import numpy as np +from scipy import stats -from ..base import clone -from ..base import _fit_context +from ..base import _fit_context, clone from ..exceptions import ConvergenceWarning from ..preprocessing import normalize from ..utils import ( + _safe_assign, + _safe_indexing, check_array, check_random_state, is_scalar_nan, - _safe_assign, - _safe_indexing, ) -from ..utils.validation import FLOAT_DTYPES, check_is_fitted -from ..utils.validation import _check_feature_names_in from ..utils._mask import _get_mask from ..utils._param_validation import HasMethods, Interval, StrOptions - -from ._base import _BaseImputer -from ._base import SimpleImputer -from ._base import _check_inputs_dtype - +from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted +from ._base import SimpleImputer, _BaseImputer, _check_inputs_dtype _ImputerTriplet = namedtuple( "_ImputerTriplet", ["feat_idx", "neighbor_feat_idx", "estimator"] diff --git a/sklearn/impute/_knn.py b/sklearn/impute/_knn.py index 915f8cbdb3fcb..db0da278b39ef 100644 --- a/sklearn/impute/_knn.py +++ b/sklearn/impute/_knn.py @@ -3,19 +3,18 @@ # License: BSD 3 clause from numbers import Integral + import numpy as np -from ._base import _BaseImputer from ..base import _fit_context -from ..utils.validation import FLOAT_DTYPES from ..metrics import pairwise_distances_chunked from ..metrics.pairwise import _NAN_METRICS from ..neighbors._base import _get_weights from ..utils import is_scalar_nan from ..utils._mask import _get_mask -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_feature_names_in from ..utils._param_validation import Hidden, Interval, StrOptions +from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted +from ._base import _BaseImputer class KNNImputer(_BaseImputer): diff --git a/sklearn/impute/tests/test_base.py b/sklearn/impute/tests/test_base.py index fedfdebb20a1f..0c1bd83f7ca9e 100644 --- a/sklearn/impute/tests/test_base.py +++ b/sklearn/impute/tests/test_base.py @@ -1,12 +1,10 @@ -import pytest - import numpy as np - -from sklearn.utils._mask import _get_mask -from sklearn.utils._testing import _convert_container, assert_allclose +import pytest from sklearn.impute._base import _BaseImputer from sklearn.impute._iterative import _assign_where +from sklearn.utils._mask import _get_mask +from sklearn.utils._testing import _convert_container, assert_allclose @pytest.fixture diff --git a/sklearn/impute/tests/test_common.py b/sklearn/impute/tests/test_common.py index 00521ca090dc5..aad7eb12a0a92 100644 --- a/sklearn/impute/tests/test_common.py +++ b/sklearn/impute/tests/test_common.py @@ -1,17 +1,14 @@ -import pytest - import numpy as np +import pytest from scipy import sparse -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_array_equal - from sklearn.experimental import enable_iterative_imputer # noqa - -from sklearn.impute import IterativeImputer -from sklearn.impute import KNNImputer -from sklearn.impute import SimpleImputer +from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer +from sklearn.utils._testing import ( + assert_allclose, + assert_allclose_dense_sparse, + assert_array_equal, +) def imputers(): diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py index 24b070d21ef06..936847e55e324 100644 --- a/sklearn/impute/tests/test_impute.py +++ b/sklearn/impute/tests/test_impute.py @@ -1,33 +1,31 @@ -import pytest +import io import warnings import numpy as np +import pytest from scipy import sparse from scipy.stats import kstest -import io - -from sklearn.utils._testing import _convert_container -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal +from sklearn import tree +from sklearn.datasets import load_diabetes +from sklearn.dummy import DummyRegressor +from sklearn.exceptions import ConvergenceWarning # make IterativeImputer available from sklearn.experimental import enable_iterative_imputer # noqa - -from sklearn.datasets import load_diabetes -from sklearn.impute import MissingIndicator -from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer -from sklearn.dummy import DummyRegressor -from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV -from sklearn.pipeline import Pipeline -from sklearn.pipeline import make_union +from sklearn.impute import IterativeImputer, KNNImputer, MissingIndicator, SimpleImputer +from sklearn.impute._base import _most_frequent +from sklearn.linear_model import ARDRegression, BayesianRidge, RidgeCV from sklearn.model_selection import GridSearchCV -from sklearn import tree +from sklearn.pipeline import Pipeline, make_union from sklearn.random_projection import _sparse_random_matrix -from sklearn.exceptions import ConvergenceWarning -from sklearn.impute._base import _most_frequent +from sklearn.utils._testing import ( + _convert_container, + assert_allclose, + assert_allclose_dense_sparse, + assert_array_almost_equal, + assert_array_equal, +) def _assert_array_equal_and_same_dtype(x, y): diff --git a/sklearn/impute/tests/test_knn.py b/sklearn/impute/tests/test_knn.py index 80ee1d0c2b574..141c2ea90dbd9 100644 --- a/sklearn/impute/tests/test_knn.py +++ b/sklearn/impute/tests/test_knn.py @@ -3,8 +3,7 @@ from sklearn import config_context from sklearn.impute import KNNImputer -from sklearn.metrics.pairwise import nan_euclidean_distances -from sklearn.metrics.pairwise import pairwise_distances +from sklearn.metrics.pairwise import nan_euclidean_distances, pairwise_distances from sklearn.neighbors import KNeighborsRegressor from sklearn.utils._testing import assert_allclose diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index f73ffe8cff26f..f8e08785e8358 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,13 +1,11 @@ """The :mod:`sklearn.inspection` module includes tools for model inspection.""" +from ._partial_dependence import partial_dependence from ._permutation_importance import permutation_importance from ._plot.decision_boundary import DecisionBoundaryDisplay - -from ._partial_dependence import partial_dependence from ._plot.partial_dependence import PartialDependenceDisplay - __all__ = [ "partial_dependence", "permutation_importance", diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py index e3af7dda1e505..59a9212aff440 100644 --- a/sklearn/inspection/_partial_dependence.py +++ b/sklearn/inspection/_partial_dependence.py @@ -11,18 +11,23 @@ from scipy import sparse from scipy.stats.mstats import mquantiles -from ._pd_utils import _check_feature_names, _get_feature_index from ..base import is_classifier, is_regressor -from ..utils.extmath import cartesian -from ..utils import check_array -from ..utils import check_matplotlib_support # noqa -from ..utils import _safe_indexing -from ..utils import _safe_assign -from ..utils import _determine_key_type -from ..utils import _get_column_indices -from ..utils.validation import _check_sample_weight -from ..utils.validation import check_is_fitted -from ..utils import Bunch +from ..ensemble import RandomForestRegressor +from ..ensemble._gb import BaseGradientBoosting +from ..ensemble._hist_gradient_boosting.gradient_boosting import ( + BaseHistGradientBoosting, +) +from ..exceptions import NotFittedError +from ..tree import DecisionTreeRegressor +from ..utils import ( + Bunch, + _determine_key_type, + _get_column_indices, + _safe_assign, + _safe_indexing, + check_array, + check_matplotlib_support, # noqa +) from ..utils._param_validation import ( HasMethods, Integral, @@ -30,14 +35,9 @@ StrOptions, validate_params, ) -from ..tree import DecisionTreeRegressor -from ..ensemble import RandomForestRegressor -from ..exceptions import NotFittedError -from ..ensemble._gb import BaseGradientBoosting -from ..ensemble._hist_gradient_boosting.gradient_boosting import ( - BaseHistGradientBoosting, -) - +from ..utils.extmath import cartesian +from ..utils.validation import _check_sample_weight, check_is_fitted +from ._pd_utils import _check_feature_names, _get_feature_index __all__ = [ "partial_dependence", diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py index 9330589a04794..f8e1fba2967c5 100644 --- a/sklearn/inspection/_permutation_importance.py +++ b/sklearn/inspection/_permutation_importance.py @@ -1,15 +1,13 @@ """Permutation importance for estimators.""" import numbers + import numpy as np from ..ensemble._bagging import _generate_indices from ..metrics import check_scoring, get_scorer_names from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer from ..model_selection._validation import _aggregate_score_dicts -from ..utils import Bunch, _safe_indexing -from ..utils import check_random_state -from ..utils import check_array -from ..utils.parallel import delayed, Parallel +from ..utils import Bunch, _safe_indexing, check_array, check_random_state from ..utils._param_validation import ( HasMethods, Integral, @@ -18,6 +16,7 @@ StrOptions, validate_params, ) +from ..utils.parallel import Parallel, delayed def _weights_scorer(scorer, estimator, X, y, sample_weight): diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 22b4590d9bc3c..e588edbef7626 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -2,14 +2,13 @@ import numpy as np -from ...preprocessing import LabelEncoder -from ...utils import check_matplotlib_support -from ...utils import _safe_indexing from ...base import is_regressor +from ...preprocessing import LabelEncoder +from ...utils import _safe_indexing, check_matplotlib_support from ...utils.validation import ( - check_is_fitted, _is_arraylike_not_scalar, _num_features, + check_is_fitted, ) diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index 48e151cefedbe..46d2c78d78d2e 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -6,16 +6,18 @@ from scipy import sparse from scipy.stats.mstats import mquantiles -from .. import partial_dependence -from .._pd_utils import _check_feature_names, _get_feature_index from ...base import is_regressor -from ...utils import Bunch -from ...utils import check_array -from ...utils import check_matplotlib_support # noqa -from ...utils import check_random_state -from ...utils import _safe_indexing -from ...utils.parallel import delayed, Parallel +from ...utils import ( + Bunch, + _safe_indexing, + check_array, + check_matplotlib_support, # noqa + check_random_state, +) from ...utils._encode import _unique +from ...utils.parallel import Parallel, delayed +from .. import partial_dependence +from .._pd_utils import _check_feature_names, _get_feature_index class PartialDependenceDisplay: diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index 73cfe187d7f6e..47c21e4521c35 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -1,21 +1,19 @@ import warnings -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.datasets import make_classification -from sklearn.linear_model import LogisticRegression -from sklearn.datasets import load_iris -from sklearn.datasets import make_multilabel_classification -from sklearn.tree import DecisionTreeRegressor -from sklearn.tree import DecisionTreeClassifier - +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.datasets import ( + load_iris, + make_classification, + make_multilabel_classification, +) from sklearn.inspection import DecisionBoundaryDisplay from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method - +from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py index acda2d001144e..106819b5a25d5 100644 --- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py +++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py @@ -1,22 +1,21 @@ import numpy as np -from scipy.stats.mstats import mquantiles - import pytest from numpy.testing import assert_allclose +from scipy.stats.mstats import mquantiles -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_iris -from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.linear_model import LinearRegression -from sklearn.utils._testing import _convert_container from sklearn.compose import make_column_transformer -from sklearn.preprocessing import OneHotEncoder -from sklearn.pipeline import make_pipeline - +from sklearn.datasets import ( + load_diabetes, + load_iris, + make_classification, + make_regression, +) +from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor from sklearn.inspection import PartialDependenceDisplay - +from sklearn.linear_model import LinearRegression +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import OneHotEncoder +from sklearn.utils._testing import _convert_container # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 4e93985f4d02a..0336dc4b827fe 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -7,41 +7,39 @@ import pytest import sklearn +from sklearn.base import BaseEstimator, ClassifierMixin, clone, is_regressor +from sklearn.cluster import KMeans +from sklearn.compose import make_column_transformer +from sklearn.datasets import load_iris, make_classification, make_regression +from sklearn.dummy import DummyClassifier +from sklearn.ensemble import ( + GradientBoostingClassifier, + GradientBoostingRegressor, + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, + RandomForestRegressor, +) +from sklearn.exceptions import NotFittedError from sklearn.inspection import partial_dependence from sklearn.inspection._partial_dependence import ( _grid_from_X, _partial_dependence_brute, _partial_dependence_recursion, ) -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import LogisticRegression -from sklearn.linear_model import MultiTaskLasso -from sklearn.tree import DecisionTreeRegressor -from sklearn.datasets import load_iris -from sklearn.datasets import make_classification, make_regression -from sklearn.cluster import KMeans -from sklearn.compose import make_column_transformer +from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso from sklearn.metrics import r2_score -from sklearn.preprocessing import PolynomialFeatures -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import RobustScaler -from sklearn.preprocessing import scale from sklearn.pipeline import make_pipeline -from sklearn.dummy import DummyClassifier -from sklearn.base import BaseEstimator, ClassifierMixin, clone -from sklearn.base import is_regressor -from sklearn.exceptions import NotFittedError -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_equal +from sklearn.preprocessing import ( + PolynomialFeatures, + RobustScaler, + StandardScaler, + scale, +) +from sklearn.tree import DecisionTreeRegressor +from sklearn.tree.tests.test_tree import assert_is_subtree from sklearn.utils import _IS_32BIT +from sklearn.utils._testing import assert_allclose, assert_array_equal from sklearn.utils.validation import check_random_state -from sklearn.tree.tests.test_tree import assert_is_subtree - # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/inspection/tests/test_pd_utils.py b/sklearn/inspection/tests/test_pd_utils.py index 5f461ad498f5b..5dea3834a77a7 100644 --- a/sklearn/inspection/tests/test_pd_utils.py +++ b/sklearn/inspection/tests/test_pd_utils.py @@ -1,9 +1,8 @@ import numpy as np import pytest -from sklearn.utils._testing import _convert_container - from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index +from sklearn.utils._testing import _convert_container @pytest.mark.parametrize( diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py index 307d17188e852..b1a680646afe1 100644 --- a/sklearn/inspection/tests/test_permutation_importance.py +++ b/sklearn/inspection/tests/test_permutation_importance.py @@ -1,31 +1,27 @@ -import pytest import numpy as np - +import pytest from numpy.testing import assert_allclose from sklearn.compose import ColumnTransformer -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_iris -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression +from sklearn.datasets import ( + load_diabetes, + load_iris, + make_classification, + make_regression, +) from sklearn.dummy import DummyClassifier -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import RandomForestClassifier -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.impute import SimpleImputer from sklearn.inspection import permutation_importance -from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.metrics import ( get_scorer, mean_squared_error, r2_score, ) +from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import KBinsDiscretizer -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import scale +from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale from sklearn.utils import parallel_backend from sklearn.utils._testing import _convert_container diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index a1cf95b95591b..4e5f7f7b0034f 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -3,20 +3,19 @@ # Nelle Varoquaux # License: BSD 3 clause +import math +import warnings +from numbers import Real + import numpy as np from scipy import interpolate from scipy.stats import spearmanr -from numbers import Real -import warnings -import math -from .base import BaseEstimator, TransformerMixin, RegressorMixin -from .base import _fit_context +from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique +from .base import BaseEstimator, RegressorMixin, TransformerMixin, _fit_context from .utils import check_array, check_consistent_length -from .utils.validation import _check_sample_weight, check_is_fitted from .utils._param_validation import Interval, StrOptions -from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique - +from .utils.validation import _check_sample_weight, check_is_fitted __all__ = ["check_increasing", "isotonic_regression", "IsotonicRegression"] diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index 7f190a2b66823..11e6e91236437 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -8,8 +8,8 @@ # License: BSD 3 clause -from numbers import Integral, Real import warnings +from numbers import Integral, Real import numpy as np import scipy.sparse as sp @@ -20,20 +20,21 @@ except ImportError: # scipy < 1.4 from scipy.fftpack import fft, ifft -from .base import BaseEstimator -from .base import TransformerMixin -from .base import ClassNamePrefixFeaturesOutMixin -from .base import _fit_context -from .utils import check_random_state -from .utils import deprecated +from .base import ( + BaseEstimator, + ClassNamePrefixFeaturesOutMixin, + TransformerMixin, + _fit_context, +) +from .metrics.pairwise import KERNEL_PARAMS, PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels +from .utils import check_random_state, deprecated +from .utils._param_validation import Interval, StrOptions from .utils.extmath import safe_sparse_dot -from .utils.validation import check_is_fitted -from .utils.validation import _check_feature_names_in -from .metrics.pairwise import pairwise_kernels, KERNEL_PARAMS -from .utils.validation import check_non_negative -from .utils._param_validation import Interval -from .utils._param_validation import StrOptions -from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS +from .utils.validation import ( + _check_feature_names_in, + check_is_fitted, + check_non_negative, +) class PolynomialCountSketch( diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py index a7bfeefaef651..f418c8946510d 100644 --- a/sklearn/kernel_ridge.py +++ b/sklearn/kernel_ridge.py @@ -7,12 +7,11 @@ import numpy as np -from .base import BaseEstimator, RegressorMixin, MultiOutputMixin -from .base import _fit_context -from .utils._param_validation import Interval, StrOptions -from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels +from .base import BaseEstimator, MultiOutputMixin, RegressorMixin, _fit_context from .linear_model._ridge import _solve_cholesky_kernel -from .utils.validation import check_is_fitted, _check_sample_weight +from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels +from .utils._param_validation import Interval, StrOptions +from .utils.validation import _check_sample_weight, check_is_fitted class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator): diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py index d5a14756c41a9..45c99d4d36df1 100644 --- a/sklearn/linear_model/__init__.py +++ b/sklearn/linear_model/__init__.py @@ -7,46 +7,44 @@ # complete documentation. from ._base import LinearRegression -from ._bayes import BayesianRidge, ARDRegression -from ._least_angle import ( - Lars, - LassoLars, - lars_path, - lars_path_gram, - LarsCV, - LassoLarsCV, - LassoLarsIC, -) +from ._bayes import ARDRegression, BayesianRidge from ._coordinate_descent import ( - Lasso, ElasticNet, - LassoCV, ElasticNetCV, - lasso_path, - enet_path, - MultiTaskLasso, + Lasso, + LassoCV, MultiTaskElasticNet, MultiTaskElasticNetCV, + MultiTaskLasso, MultiTaskLassoCV, + enet_path, + lasso_path, ) -from ._glm import PoissonRegressor, GammaRegressor, TweedieRegressor +from ._glm import GammaRegressor, PoissonRegressor, TweedieRegressor from ._huber import HuberRegressor -from ._sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber -from ._stochastic_gradient import SGDClassifier, SGDRegressor, SGDOneClassSVM -from ._ridge import Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV, ridge_regression +from ._least_angle import ( + Lars, + LarsCV, + LassoLars, + LassoLarsCV, + LassoLarsIC, + lars_path, + lars_path_gram, +) from ._logistic import LogisticRegression, LogisticRegressionCV from ._omp import ( - orthogonal_mp, - orthogonal_mp_gram, OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV, + orthogonal_mp, + orthogonal_mp_gram, ) -from ._passive_aggressive import PassiveAggressiveClassifier -from ._passive_aggressive import PassiveAggressiveRegressor +from ._passive_aggressive import PassiveAggressiveClassifier, PassiveAggressiveRegressor from ._perceptron import Perceptron - from ._quantile import QuantileRegressor from ._ransac import RANSACRegressor +from ._ridge import Ridge, RidgeClassifier, RidgeClassifierCV, RidgeCV, ridge_regression +from ._sgd_fast import Hinge, Huber, Log, ModifiedHuber, SquaredLoss +from ._stochastic_gradient import SGDClassifier, SGDOneClassSVM, SGDRegressor from ._theil_sen import TheilSenRegressor __all__ = [ diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 92c067c850225..249c13da179c0 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -14,33 +14,37 @@ # Maria Telenczuk # License: BSD 3 clause -from abc import ABCMeta, abstractmethod import numbers import warnings +from abc import ABCMeta, abstractmethod +from numbers import Integral import numpy as np import scipy.sparse as sp -from scipy import linalg -from scipy import optimize -from scipy import sparse +from scipy import linalg, optimize, sparse from scipy.sparse.linalg import lsqr from scipy.special import expit -from numbers import Integral -from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin -from ..base import _fit_context +from ..base import ( + BaseEstimator, + ClassifierMixin, + MultiOutputMixin, + RegressorMixin, + _fit_context, +) from ..preprocessing._data import _is_constant_feature -from ..utils import check_array -from ..utils.validation import FLOAT_DTYPES -from ..utils import check_random_state -from ..utils.extmath import safe_sparse_dot -from ..utils.extmath import _incremental_mean_and_var -from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale +from ..utils import check_array, check_random_state from ..utils._array_api import get_namespace -from ..utils._seq_dataset import ArrayDataset32, CSRDataset32 -from ..utils._seq_dataset import ArrayDataset64, CSRDataset64 -from ..utils.validation import check_is_fitted, _check_sample_weight -from ..utils.parallel import delayed, Parallel +from ..utils._seq_dataset import ( + ArrayDataset32, + ArrayDataset64, + CSRDataset32, + CSRDataset64, +) +from ..utils.extmath import _incremental_mean_and_var, safe_sparse_dot +from ..utils.parallel import Parallel, delayed +from ..utils.sparsefuncs import inplace_column_scale, mean_variance_axis +from ..utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted # TODO: bayesian_ridge_regression and bayesian_regression_ard # should be squashed into its respective objects. diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 37dc3b81511f5..7b64e91f18c17 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -8,16 +8,16 @@ import warnings from math import log from numbers import Integral, Real + import numpy as np from scipy import linalg +from scipy.linalg import pinvh -from ._base import LinearModel, _preprocess_data, _rescale_data -from ..base import RegressorMixin -from ..base import _fit_context +from ..base import RegressorMixin, _fit_context +from ..utils._param_validation import Hidden, Interval, StrOptions from ..utils.extmath import fast_logdet -from scipy.linalg import pinvh from ..utils.validation import _check_sample_weight -from ..utils._param_validation import Interval, Hidden, StrOptions +from ._base import LinearModel, _preprocess_data, _rescale_data # TODO(1.5) Remove diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 829c0ab6149f1..c7caeab2090fe 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -5,36 +5,34 @@ # # License: BSD 3 clause +import numbers import sys import warnings -import numbers from abc import ABC, abstractmethod from functools import partial from numbers import Integral, Real import numpy as np -from scipy import sparse from joblib import effective_n_jobs +from scipy import sparse -from ._base import LinearModel, _pre_fit -from ..base import RegressorMixin, MultiOutputMixin -from ..base import _fit_context -from ._base import _preprocess_data +from ..base import MultiOutputMixin, RegressorMixin, _fit_context +from ..model_selection import check_cv from ..utils import check_array, check_scalar -from ..utils.validation import check_random_state from ..utils._param_validation import Interval, StrOptions -from ..model_selection import check_cv from ..utils.extmath import safe_sparse_dot +from ..utils.parallel import Parallel, delayed from ..utils.validation import ( _check_sample_weight, check_consistent_length, check_is_fitted, + check_random_state, column_or_1d, ) -from ..utils.parallel import delayed, Parallel # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast' from . import _cd_fast as cd_fast # type: ignore +from ._base import LinearModel, _pre_fit, _preprocess_data def _set_order(X, y, order="C"): diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py index fea9c4d4cf6ba..1b82bbd77bcf9 100644 --- a/sklearn/linear_model/_glm/__init__.py +++ b/sklearn/linear_model/_glm/__init__.py @@ -1,10 +1,10 @@ # License: BSD 3 clause from .glm import ( - _GeneralizedLinearRegressor, - PoissonRegressor, GammaRegressor, + PoissonRegressor, TweedieRegressor, + _GeneralizedLinearRegressor, ) __all__ = [ diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py index b1bc460f24dff..3dc0bbdc66bff 100644 --- a/sklearn/linear_model/_glm/glm.py +++ b/sklearn/linear_model/_glm/glm.py @@ -11,7 +11,6 @@ import numpy as np import scipy.optimize -from ._newton_solver import NewtonCholeskySolver, NewtonSolver from ..._loss.loss import ( HalfGammaLoss, HalfPoissonLoss, @@ -19,14 +18,14 @@ HalfTweedieLoss, HalfTweedieLossIdentity, ) -from ...base import BaseEstimator, RegressorMixin -from ...base import _fit_context +from ...base import BaseEstimator, RegressorMixin, _fit_context from ...utils import check_array from ...utils._openmp_helpers import _openmp_effective_n_threads from ...utils._param_validation import Hidden, Interval, StrOptions from ...utils.optimize import _check_optimize_result from ...utils.validation import _check_sample_weight, check_is_fitted from .._linear_loss import LinearModelLoss +from ._newton_solver import NewtonCholeskySolver, NewtonSolver class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator): diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py index c92ef5f99ca8a..5256a5f370272 100644 --- a/sklearn/linear_model/_glm/tests/test_glm.py +++ b/sklearn/linear_model/_glm/tests/test_glm.py @@ -2,22 +2,22 @@ # # License: BSD 3 clause -from functools import partial import itertools import warnings +from functools import partial import numpy as np -from numpy.testing import assert_allclose import pytest import scipy +from numpy.testing import assert_allclose from scipy import linalg from scipy.optimize import minimize, root -from sklearn.base import clone from sklearn._loss import HalfBinomialLoss, HalfPoissonLoss, HalfTweedieLoss from sklearn._loss.link import IdentityLink, LogLink - +from sklearn.base import clone from sklearn.datasets import make_low_rank_matrix, make_regression +from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import ( GammaRegressor, PoissonRegressor, @@ -27,11 +27,9 @@ from sklearn.linear_model._glm import _GeneralizedLinearRegressor from sklearn.linear_model._glm._newton_solver import NewtonCholeskySolver from sklearn.linear_model._linear_loss import LinearModelLoss -from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import d2_tweedie_score, mean_poisson_deviance from sklearn.model_selection import train_test_split - SOLVERS = ["lbfgs", "newton-cholesky"] diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py index def2ae273d5c4..554f693061116 100644 --- a/sklearn/linear_model/_huber.py +++ b/sklearn/linear_model/_huber.py @@ -2,18 +2,17 @@ # License: BSD 3 clause from numbers import Integral, Real -import numpy as np +import numpy as np from scipy import optimize -from ..base import BaseEstimator, RegressorMixin -from ..base import _fit_context -from ._base import LinearModel +from ..base import BaseEstimator, RegressorMixin, _fit_context from ..utils import axis0_safe_slice from ..utils._param_validation import Interval -from ..utils.validation import _check_sample_weight from ..utils.extmath import safe_sparse_dot from ..utils.optimize import _check_optimize_result +from ..utils.validation import _check_sample_weight +from ._base import LinearModel def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None): diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index e6c653eb80bb3..439ba636d159f 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -8,27 +8,24 @@ # # License: BSD 3 clause -from math import log import sys import warnings - +from math import log from numbers import Integral, Real + import numpy as np -from scipy import linalg, interpolate +from scipy import interpolate, linalg from scipy.linalg.lapack import get_lapack_funcs -from ._base import LinearModel, LinearRegression -from ._base import _deprecate_normalize, _preprocess_data -from ..base import RegressorMixin, MultiOutputMixin -from ..base import _fit_context +from ..base import MultiOutputMixin, RegressorMixin, _fit_context +from ..exceptions import ConvergenceWarning +from ..model_selection import check_cv # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs' -from ..utils import arrayfuncs, as_float_array # type: ignore -from ..utils import check_random_state +from ..utils import arrayfuncs, as_float_array, check_random_state # type: ignore from ..utils._param_validation import Hidden, Interval, StrOptions -from ..model_selection import check_cv -from ..exceptions import ConvergenceWarning -from ..utils.parallel import delayed, Parallel +from ..utils.parallel import Parallel, delayed +from ._base import LinearModel, LinearRegression, _deprecate_normalize, _preprocess_data SOLVE_TRIANGULAR_ARGS = {"check_finite": False} diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py index f70d78fb42871..92a203abc87ab 100644 --- a/sklearn/linear_model/_linear_loss.py +++ b/sklearn/linear_model/_linear_loss.py @@ -3,6 +3,7 @@ """ import numpy as np from scipy import sparse + from ..utils.extmath import squared_norm diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 30a0f40a0f2fd..6bdc4b7368ef0 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -11,35 +11,37 @@ # Arthur Mensch