From 9705422f6bd590297b7e30e37ced83407a3c1fb0 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 13 May 2022 13:17:17 +0200 Subject: [PATCH 01/10] MNT add isort to pre-commit hooks --- .pre-commit-config.yaml | 4 ++++ pyproject.toml | 3 +++ 2 files changed, 7 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6519a849852fc..0f1caa607d663 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,3 +20,7 @@ repos: - id: mypy files: sklearn/ additional_dependencies: [pytest==6.2.4] +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort diff --git a/pyproject.toml b/pyproject.toml index 9b38a78966358..ce4a37113e528 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,3 +33,6 @@ exclude = ''' | asv_benchmarks/env )/ ''' + +[tool.isort] +profile = "black" From 6f968645df7844cf73d1cdf81818b68a358ce95e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 13 May 2022 14:12:12 +0200 Subject: [PATCH 02/10] apply isort to the repo --- .github/scripts/label_title_regex.py | 5 +- asv_benchmarks/benchmarks/cluster.py | 2 +- asv_benchmarks/benchmarks/common.py | 8 +- asv_benchmarks/benchmarks/datasets.py | 15 +- asv_benchmarks/benchmarks/decomposition.py | 4 +- asv_benchmarks/benchmarks/ensemble.py | 2 +- asv_benchmarks/benchmarks/linear_model.py | 4 +- benchmarks/bench_20newsgroups.py | 19 +- benchmarks/bench_covertype.py | 18 +- benchmarks/bench_feature_expansions.py | 4 +- benchmarks/bench_glm.py | 3 +- benchmarks/bench_glmnet.py | 10 +- benchmarks/bench_hist_gradient_boosting.py | 15 +- .../bench_hist_gradient_boosting_adult.py | 7 +- ...hist_gradient_boosting_categorical_only.py | 3 +- ...bench_hist_gradient_boosting_higgsboson.py | 10 +- .../bench_hist_gradient_boosting_threading.py | 17 +- benchmarks/bench_isolation_forest.py | 7 +- benchmarks/bench_isotonic.py | 10 +- ...kernel_pca_solvers_time_vs_n_components.py | 7 +- ...ch_kernel_pca_solvers_time_vs_n_samples.py | 7 +- benchmarks/bench_lasso.py | 4 +- benchmarks/bench_lof.py | 8 +- benchmarks/bench_mnist.py | 18 +- benchmarks/bench_multilabel_metrics.py | 11 +- benchmarks/bench_online_ocsvm.py | 19 +- benchmarks/bench_plot_fastkmeans.py | 2 +- benchmarks/bench_plot_incremental_pca.py | 8 +- benchmarks/bench_plot_lasso_path.py | 7 +- benchmarks/bench_plot_neighbors.py | 4 +- benchmarks/bench_plot_nmf.py | 19 +- benchmarks/bench_plot_omp_lars.py | 2 +- benchmarks/bench_plot_parallel_pairwise.py | 3 +- ...ch_plot_polynomial_kernel_approximation.py | 16 +- benchmarks/bench_plot_randomized_svd.py | 27 +- benchmarks/bench_plot_svd.py | 9 +- benchmarks/bench_plot_ward.py | 2 +- benchmarks/bench_random_projections.py | 6 +- benchmarks/bench_rcv1_logreg_convergence.py | 9 +- benchmarks/bench_saga.py | 10 +- .../bench_sample_without_replacement.py | 6 +- benchmarks/bench_sgd_regression.py | 11 +- benchmarks/bench_sparsify.py | 3 +- benchmarks/bench_text_vectorizers.py | 4 +- benchmarks/bench_tree.py | 5 +- benchmarks/bench_tsne_mnist.py | 9 +- benchmarks/plot_tsne_mnist.py | 7 +- build_tools/circle/list_versions.py | 1 - build_tools/generate_authors_table.py | 7 +- build_tools/github/check_wheels.py | 5 +- build_tools/github/vendor.py | 1 - doc/conf.py | 13 +- doc/conftest.py | 12 +- doc/sphinxext/allow_nan_estimators.py | 13 +- doc/sphinxext/doi_role.py | 1 - doc/sphinxext/github_link.py | 4 +- .../machine_learning_map/parse_path.py | 192 +- .../machine_learning_map/pyparsing.py | 3437 ++++++++++------- .../data/languages/fetch_data.py | 67 +- .../data/movie_reviews/fetch_data.py | 10 +- .../exercise_01_language_train_model.py | 35 +- .../skeletons/exercise_02_sentiment.py | 25 +- .../exercise_01_language_train_model.py | 42 +- .../solutions/exercise_02_sentiment.py | 48 +- .../plot_cyclical_feature_engineering.py | 24 +- .../applications/plot_digits_denoising.py | 3 +- .../applications/plot_face_recognition.py | 10 +- .../plot_model_complexity_influence.py | 10 +- .../plot_out_of_core_classification.py | 14 +- .../plot_outlier_detection_wine.py | 7 +- .../applications/plot_prediction_latency.py | 13 +- .../plot_species_distribution_modeling.py | 6 +- examples/applications/plot_stock_market.py | 1 + .../plot_tomography_l1_reconstruction.py | 9 +- .../plot_topics_extraction_with_nmf_lda.py | 5 +- examples/applications/svm_gui.py | 7 +- .../wikipedia_principal_eigenvector.py | 6 +- .../bicluster/plot_bicluster_newsgroups.py | 5 +- .../bicluster/plot_spectral_biclustering.py | 3 +- .../bicluster/plot_spectral_coclustering.py | 2 +- examples/calibration/plot_calibration.py | 3 +- .../calibration/plot_calibration_curve.py | 6 +- .../plot_calibration_multiclass.py | 1 + .../plot_classification_probability.py | 8 +- .../plot_classifier_comparison.py | 21 +- .../plot_digits_classification.py | 2 +- examples/classification/plot_lda.py | 5 +- examples/classification/plot_lda_qda.py | 8 +- .../plot_adjusted_for_chance_measures.py | 6 +- examples/cluster/plot_affinity_propagation.py | 5 +- .../cluster/plot_agglomerative_clustering.py | 1 + .../cluster/plot_agglomerative_dendrogram.py | 4 +- .../cluster/plot_birch_vs_minibatchkmeans.py | 8 +- examples/cluster/plot_bisect_kmeans.py | 3 +- examples/cluster/plot_cluster_comparison.py | 4 +- examples/cluster/plot_cluster_iris.py | 4 +- examples/cluster/plot_coin_segmentation.py | 5 +- examples/cluster/plot_color_quantization.py | 8 +- examples/cluster/plot_dbscan.py | 3 +- examples/cluster/plot_digits_agglomeration.py | 4 +- examples/cluster/plot_digits_linkage.py | 2 +- examples/cluster/plot_face_compress.py | 3 +- ...e_agglomeration_vs_univariate_selection.py | 9 +- examples/cluster/plot_inductive_clustering.py | 2 +- examples/cluster/plot_kmeans_assumptions.py | 2 +- examples/cluster/plot_kmeans_digits.py | 2 + examples/cluster/plot_kmeans_plusplus.py | 3 +- .../plot_kmeans_silhouette_analysis.py | 10 +- .../plot_kmeans_stability_low_dim_dense.py | 10 +- examples/cluster/plot_linkage_comparison.py | 4 +- examples/cluster/plot_mean_shift.py | 4 +- examples/cluster/plot_mini_batch_kmeans.py | 2 + examples/cluster/plot_optics.py | 3 +- examples/cluster/plot_segmentation_toy.py | 3 +- .../plot_ward_structured_vs_unstructured.py | 10 +- examples/compose/plot_column_transformer.py | 4 +- .../plot_column_transformer_mixed_types.py | 6 +- examples/compose/plot_compare_reduction.py | 10 +- examples/compose/plot_digits_pipe.py | 4 +- examples/compose/plot_feature_union.py | 6 +- examples/compose/plot_transformed_target.py | 6 +- .../covariance/plot_covariance_estimation.py | 5 +- examples/covariance/plot_lw_vs_oas.py | 6 +- .../covariance/plot_mahalanobis_distances.py | 1 + .../plot_robust_vs_empirical_covariance.py | 4 +- examples/covariance/plot_sparse_cov.py | 1 + .../plot_compare_cross_decomposition.py | 5 +- .../cross_decomposition/plot_pcr_vs_pls.py | 11 +- examples/datasets/plot_digits_last_image.py | 4 +- examples/datasets/plot_random_dataset.py | 4 +- .../plot_random_multilabel_dataset.py | 2 +- .../decomposition/plot_beta_divergence.py | 3 +- .../decomposition/plot_faces_decomposition.py | 5 +- .../plot_ica_blind_source_separation.py | 4 +- .../decomposition/plot_image_denoising.py | 1 - .../decomposition/plot_incremental_pca.py | 2 +- examples/decomposition/plot_pca_3d.py | 5 +- examples/decomposition/plot_pca_iris.py | 6 +- .../plot_pca_vs_fa_model_selection.py | 6 +- examples/decomposition/plot_sparse_coding.py | 2 +- examples/decomposition/plot_varimax_fa.py | 4 +- .../ensemble/plot_adaboost_hastie_10_2.py | 1 + examples/ensemble/plot_adaboost_multiclass.py | 1 - examples/ensemble/plot_adaboost_regression.py | 6 +- examples/ensemble/plot_adaboost_twoclass.py | 7 +- examples/ensemble/plot_bias_variance.py | 2 +- examples/ensemble/plot_ensemble_oob.py | 3 +- .../ensemble/plot_feature_transformation.py | 6 +- examples/ensemble/plot_forest_importances.py | 1 + .../ensemble/plot_forest_importances_faces.py | 1 + examples/ensemble/plot_forest_iris.py | 6 +- .../plot_gradient_boosting_categorical.py | 9 +- .../plot_gradient_boosting_early_stopping.py | 5 +- .../ensemble/plot_gradient_boosting_oob.py | 8 +- .../plot_gradient_boosting_quantile.py | 8 +- .../plot_gradient_boosting_regression.py | 1 + .../plot_gradient_boosting_regularization.py | 6 +- examples/ensemble/plot_isolation_forest.py | 3 +- .../ensemble/plot_monotonic_constraints.py | 6 +- .../ensemble/plot_random_forest_embedding.py | 4 +- ...ot_random_forest_regression_multioutput.py | 4 +- examples/ensemble/plot_stack_predictors.py | 7 +- .../ensemble/plot_voting_decision_regions.py | 6 +- examples/ensemble/plot_voting_probas.py | 5 +- examples/ensemble/plot_voting_regressor.py | 8 +- examples/exercises/plot_cv_digits.py | 3 +- .../plot_digits_classification_exercise.py | 2 +- examples/exercises/plot_iris_exercise.py | 3 +- .../feature_selection/plot_f_test_vs_mi.py | 3 +- .../plot_feature_selection.py | 1 + examples/feature_selection/plot_rfe_digits.py | 5 +- .../plot_rfe_with_cross_validation.py | 7 +- .../plot_select_from_model_diabetes.py | 4 +- .../gaussian_process/plot_compare_gpr_krr.py | 1 + examples/gaussian_process/plot_gpc.py | 4 +- examples/gaussian_process/plot_gpc_iris.py | 3 +- .../plot_gpc_isoprobability.py | 6 +- examples/gaussian_process/plot_gpc_xor.py | 3 +- examples/gaussian_process/plot_gpr_co2.py | 1 + .../plot_gpr_on_structured_data.py | 9 +- ...t_iterative_imputer_variants_comparison.py | 14 +- examples/impute/plot_missing_values.py | 8 +- ...linear_model_coefficient_interpretation.py | 15 +- .../inspection/plot_partial_dependence.py | 10 +- .../inspection/plot_permutation_importance.py | 3 +- ...t_permutation_importance_multicollinear.py | 2 +- .../plot_scalable_poly_kernels.py | 3 +- examples/linear_model/plot_ard.py | 3 +- ...puted_gram_matrix_with_weighted_samples.py | 1 + examples/linear_model/plot_huber_vs_ridge.py | 2 +- examples/linear_model/plot_iris_logistic.py | 3 +- .../linear_model/plot_lasso_and_elasticnet.py | 2 +- .../plot_lasso_coordinate_descent_path.py | 6 +- .../plot_lasso_dense_vs_sparse_data.py | 5 +- examples/linear_model/plot_lasso_lars.py | 5 +- examples/linear_model/plot_lasso_lars_ic.py | 5 +- .../plot_lasso_model_selection.py | 3 +- examples/linear_model/plot_logistic.py | 6 +- .../plot_logistic_l1_l2_sparsity.py | 4 +- .../linear_model/plot_logistic_multinomial.py | 5 +- .../plot_multi_task_lasso_support.py | 2 +- examples/linear_model/plot_nnls.py | 3 +- examples/linear_model/plot_ols.py | 1 + examples/linear_model/plot_ols_3d.py | 3 +- .../linear_model/plot_ols_ridge_variance.py | 2 +- examples/linear_model/plot_omp.py | 4 +- ...plot_poisson_regression_non_normal_loss.py | 28 +- .../plot_polynomial_interpolation.py | 5 +- .../linear_model/plot_quantile_regression.py | 3 +- examples/linear_model/plot_ransac.py | 3 +- examples/linear_model/plot_ridge_path.py | 3 +- examples/linear_model/plot_robust_fit.py | 8 +- examples/linear_model/plot_sgd_comparison.py | 13 +- .../linear_model/plot_sgd_early_stopping.py | 10 +- examples/linear_model/plot_sgd_iris.py | 5 +- .../linear_model/plot_sgd_loss_functions.py | 2 +- examples/linear_model/plot_sgd_penalties.py | 2 +- .../plot_sgd_separating_hyperplane.py | 5 +- .../linear_model/plot_sgd_weighted_samples.py | 3 +- .../linear_model/plot_sgdocsvm_vs_ocsvm.py | 9 +- ...sparse_logistic_regression_20newsgroups.py | 2 +- .../plot_sparse_logistic_regression_mnist.py | 1 + examples/linear_model/plot_theilsen.py | 7 +- ...lot_tweedie_regression_insurance_claims.py | 28 +- examples/manifold/plot_compare_methods.py | 6 +- examples/manifold/plot_lle_digits.py | 5 +- examples/manifold/plot_manifold_sphere.py | 12 +- examples/manifold/plot_mds.py | 3 +- examples/manifold/plot_swissroll.py | 2 +- examples/manifold/plot_t_sne_perplexity.py | 9 +- .../miscellaneous/plot_anomaly_comparison.py | 8 +- .../plot_changed_only_pprint_parameter.py | 3 +- .../plot_display_object_visualization.py | 13 +- .../miscellaneous/plot_isotonic_regression.py | 4 +- .../plot_johnson_lindenstrauss_bound.py | 13 +- .../plot_kernel_approximation.py | 7 +- .../plot_kernel_ridge_regression.py | 2 +- examples/miscellaneous/plot_multilabel.py | 6 +- .../plot_multioutput_face_completion.py | 8 +- .../plot_outlier_detection_bench.py | 9 +- ...ot_partial_dependence_visualization_api.py | 8 +- .../miscellaneous/plot_pipeline_display.py | 26 +- .../plot_roc_curve_visualization_api.py | 5 +- examples/mixture/plot_concentration_prior.py | 4 +- examples/mixture/plot_gmm.py | 4 +- examples/mixture/plot_gmm_covariances.py | 1 - examples/mixture/plot_gmm_init.py | 6 +- examples/mixture/plot_gmm_pdf.py | 3 +- examples/mixture/plot_gmm_selection.py | 6 +- examples/mixture/plot_gmm_sin.py | 4 +- .../grid_search_text_feature_extraction.py | 5 +- .../model_selection/plot_confusion_matrix.py | 6 +- examples/model_selection/plot_cv_indices.py | 15 +- examples/model_selection/plot_cv_predict.py | 6 +- .../plot_grid_search_digits.py | 3 +- .../plot_grid_search_refit_callable.py | 2 +- .../model_selection/plot_grid_search_stats.py | 1 + .../model_selection/plot_learning_curve.py | 8 +- .../plot_multi_metric_evaluation.py | 3 +- .../plot_nested_cross_validation_iris.py | 7 +- ...ot_permutation_tests_for_classification.py | 3 +- .../model_selection/plot_precision_recall.py | 7 +- .../model_selection/plot_randomized_search.py | 8 +- examples/model_selection/plot_roc.py | 12 +- examples/model_selection/plot_roc_crossval.py | 3 +- .../plot_successive_halving_heatmap.py | 6 +- .../plot_successive_halving_iterations.py | 9 +- .../plot_train_error_vs_test_error.py | 1 + .../plot_underfitting_overfitting.py | 7 +- .../model_selection/plot_validation_curve.py | 2 +- .../plot_classifier_chain_yeast.py | 9 +- .../approximate_nearest_neighbors.py | 11 +- .../plot_caching_nearest_neighbors.py | 5 +- examples/neighbors/plot_classification.py | 3 +- .../neighbors/plot_digits_kde_sampling.py | 4 +- examples/neighbors/plot_kde_1d.py | 4 +- .../neighbors/plot_lof_novelty_detection.py | 3 +- .../neighbors/plot_lof_outlier_detection.py | 3 +- examples/neighbors/plot_nca_classification.py | 6 +- examples/neighbors/plot_nca_dim_reduction.py | 5 +- examples/neighbors/plot_nca_illustration.py | 7 +- examples/neighbors/plot_nearest_centroid.py | 5 +- examples/neighbors/plot_regression.py | 4 +- examples/neighbors/plot_species_kde.py | 3 +- examples/neural_networks/plot_mlp_alpha.py | 5 +- .../plot_mlp_training_curves.py | 4 +- .../neural_networks/plot_mnist_filters.py | 4 +- .../plot_rbm_logistic_classification.py | 4 +- examples/preprocessing/plot_all_scaling.py | 24 +- examples/preprocessing/plot_discretization.py | 2 +- .../plot_discretization_classification.py | 15 +- .../plot_discretization_strategies.py | 4 +- .../preprocessing/plot_map_data_to_normal.py | 6 +- .../preprocessing/plot_scaling_importance.py | 8 +- .../plot_release_highlights_0_22_0.py | 25 +- .../plot_release_highlights_0_23_0.py | 27 +- .../plot_release_highlights_0_24_0.py | 21 +- .../plot_release_highlights_1_0_0.py | 7 +- .../plot_release_highlights_1_1_0.py | 23 +- .../plot_label_propagation_digits.py | 6 +- ...abel_propagation_digits_active_learning.py | 4 +- .../plot_label_propagation_structure.py | 1 + .../plot_self_training_varying_threshold.py | 7 +- .../plot_semi_supervised_newsgroups.py | 10 +- .../plot_semi_supervised_versus_svm_iris.py | 7 +- examples/svm/plot_custom_kernel.py | 5 +- examples/svm/plot_iris_svc.py | 4 +- .../svm/plot_linearsvc_support_vectors.py | 5 +- examples/svm/plot_oneclass.py | 5 +- examples/svm/plot_rbf_parameters.py | 3 +- examples/svm/plot_separating_hyperplane.py | 2 +- .../plot_separating_hyperplane_unbalanced.py | 1 + examples/svm/plot_svm_anova.py | 5 +- examples/svm/plot_svm_kernels.py | 4 +- examples/svm/plot_svm_margin.py | 3 +- examples/svm/plot_svm_nonlinear.py | 3 +- examples/svm/plot_svm_regression.py | 3 +- examples/svm/plot_svm_scale_c.py | 7 +- examples/svm/plot_svm_tie_breaking.py | 5 +- examples/svm/plot_weighted_samples.py | 3 +- ...ot_document_classification_20newsgroups.py | 23 +- examples/text/plot_document_clustering.py | 24 +- .../text/plot_hashing_vs_dict_vectorizer.py | 2 +- examples/tree/plot_cost_complexity_pruning.py | 3 +- examples/tree/plot_iris_dtc.py | 6 +- examples/tree/plot_tree_regression.py | 4 +- .../tree/plot_tree_regression_multioutput.py | 3 +- examples/tree/plot_unveil_tree_structure.py | 4 +- maint_tools/check_pxd_in_installation.py | 5 +- maint_tools/sort_whats_new.py | 2 +- maint_tools/update_tracking_issue.py | 4 +- setup.py | 18 +- sklearn/__init__.py | 7 +- sklearn/_build_utils/__init__.py | 11 +- sklearn/_build_utils/openmp_helpers.py | 3 +- sklearn/_build_utils/pre_build_helpers.py | 8 +- sklearn/_config.py | 2 +- sklearn/_isotonic.pyx | 3 +- sklearn/_loss/__init__.py | 11 +- sklearn/_loss/glm_distribution.py | 3 +- sklearn/_loss/link.py | 1 + sklearn/_loss/loss.py | 26 +- sklearn/_loss/setup.py | 1 + sklearn/_loss/tests/test_glm_distribution.py | 15 +- sklearn/_loss/tests/test_link.py | 10 +- sklearn/_loss/tests/test_loss.py | 14 +- sklearn/_min_dependencies.py | 3 +- sklearn/base.py | 28 +- sklearn/calibration.py | 35 +- sklearn/cluster/__init__.py | 20 +- sklearn/cluster/_affinity_propagation.py | 10 +- sklearn/cluster/_agglomerative.py | 2 +- sklearn/cluster/_bicluster.py | 11 +- sklearn/cluster/_birch.py | 19 +- sklearn/cluster/_bisect_k_means.py | 25 +- sklearn/cluster/_dbscan.py | 8 +- sklearn/cluster/_dbscan_inner.pyx | 2 +- sklearn/cluster/_feature_agglomeration.py | 2 +- sklearn/cluster/_hierarchical_fast.pyx | 14 +- sklearn/cluster/_k_means_common.pyx | 2 +- sklearn/cluster/_k_means_elkan.pyx | 22 +- sklearn/cluster/_k_means_lloyd.pyx | 24 +- sklearn/cluster/_k_means_minibatch.pyx | 2 +- sklearn/cluster/_kmeans.py | 53 +- sklearn/cluster/_mean_shift.py | 16 +- sklearn/cluster/_optics.py | 9 +- sklearn/cluster/_spectral.py | 7 +- sklearn/cluster/tests/common.py | 1 - .../tests/test_affinity_propagation.py | 12 +- sklearn/cluster/tests/test_bicluster.py | 28 +- sklearn/cluster/tests/test_birch.py | 16 +- sklearn/cluster/tests/test_bisect_k_means.py | 2 +- sklearn/cluster/tests/test_dbscan.py | 17 +- .../tests/test_feature_agglomeration.py | 4 +- sklearn/cluster/tests/test_hierarchical.py | 40 +- sklearn/cluster/tests/test_k_means.py | 41 +- sklearn/cluster/tests/test_mean_shift.py | 19 +- sklearn/cluster/tests/test_optics.py | 17 +- sklearn/cluster/tests/test_spectral.py | 18 +- sklearn/compose/__init__.py | 3 +- sklearn/compose/_column_transformer.py | 19 +- sklearn/compose/_target.py | 8 +- .../compose/tests/test_column_transformer.py | 23 +- sklearn/compose/tests/test_target.py | 23 +- sklearn/conftest.py | 29 +- sklearn/covariance/__init__.py | 15 +- sklearn/covariance/_elliptic_envelope.py | 7 +- sklearn/covariance/_empirical_covariance.py | 3 +- sklearn/covariance/_graph_lasso.py | 15 +- sklearn/covariance/_robust_covariance.py | 7 +- sklearn/covariance/_shrunk_covariance.py | 4 +- sklearn/covariance/tests/test_covariance.py | 17 +- .../tests/test_elliptic_envelope.py | 8 +- .../covariance/tests/test_graphical_lasso.py | 19 +- .../tests/test_robust_covariance.py | 6 +- sklearn/cross_decomposition/__init__.py | 2 +- sklearn/cross_decomposition/_pls.py | 19 +- sklearn/cross_decomposition/tests/test_pls.py | 13 +- sklearn/datasets/__init__.py | 89 +- sklearn/datasets/_arff_parser.py | 6 +- sklearn/datasets/_base.py | 17 +- sklearn/datasets/_california_housing.py | 21 +- sklearn/datasets/_covtype.py | 21 +- sklearn/datasets/_kddcup99.py | 19 +- sklearn/datasets/_lfw.py | 13 +- sklearn/datasets/_olivetti_faces.py | 10 +- sklearn/datasets/_openml.py | 8 +- sklearn/datasets/_rcv1.py | 16 +- sklearn/datasets/_samples_generator.py | 4 +- sklearn/datasets/_species_distributions.py | 8 +- sklearn/datasets/_svmlight_format_fast.pyx | 3 +- sklearn/datasets/_svmlight_format_io.py | 5 +- sklearn/datasets/_twenty_newsgroups.py | 28 +- sklearn/datasets/setup.py | 3 +- sklearn/datasets/tests/conftest.py | 1 + sklearn/datasets/tests/test_20news.py | 13 +- sklearn/datasets/tests/test_arff_parser.py | 5 +- sklearn/datasets/tests/test_base.py | 37 +- .../datasets/tests/test_california_housing.py | 3 +- sklearn/datasets/tests/test_common.py | 2 +- sklearn/datasets/tests/test_covtype.py | 2 + sklearn/datasets/tests/test_kddcup99.py | 9 +- sklearn/datasets/tests/test_lfw.py | 11 +- sklearn/datasets/tests/test_olivetti_faces.py | 3 +- sklearn/datasets/tests/test_openml.py | 18 +- sklearn/datasets/tests/test_rcv1.py | 9 +- .../datasets/tests/test_samples_generator.py | 50 +- .../datasets/tests/test_svmlight_format.py | 20 +- sklearn/decomposition/__init__.py | 27 +- sklearn/decomposition/_base.py | 3 +- sklearn/decomposition/_dict_learning.py | 20 +- sklearn/decomposition/_factor_analysis.py | 6 +- sklearn/decomposition/_fastica.py | 3 +- sklearn/decomposition/_incremental_pca.py | 4 +- sklearn/decomposition/_kernel_pca.py | 19 +- sklearn/decomposition/_lda.py | 7 +- sklearn/decomposition/_nmf.py | 16 +- sklearn/decomposition/_pca.py | 9 +- sklearn/decomposition/_sparse_pca.py | 4 +- sklearn/decomposition/_truncated_svd.py | 1 + sklearn/decomposition/setup.py | 1 + .../decomposition/tests/test_dict_learning.py | 50 +- .../tests/test_factor_analysis.py | 10 +- sklearn/decomposition/tests/test_fastica.py | 8 +- .../tests/test_incremental_pca.py | 15 +- .../decomposition/tests/test_kernel_pca.py | 23 +- sklearn/decomposition/tests/test_nmf.py | 24 +- .../decomposition/tests/test_online_lda.py | 19 +- sklearn/decomposition/tests/test_pca.py | 14 +- .../decomposition/tests/test_sparse_pca.py | 13 +- .../decomposition/tests/test_truncated_svd.py | 7 +- sklearn/discriminant_analysis.py | 21 +- sklearn/dummy.py | 20 +- sklearn/ensemble/__init__.py | 31 +- sklearn/ensemble/_bagging.py | 17 +- sklearn/ensemble/_base.py | 14 +- sklearn/ensemble/_forest.py | 30 +- sklearn/ensemble/_gb.py | 46 +- sklearn/ensemble/_gb_losses.py | 6 +- sklearn/ensemble/_gradient_boosting.pyx | 17 +- .../_hist_gradient_boosting/_binning.pyx | 5 +- .../_hist_gradient_boosting/_bitset.pxd | 6 +- .../_hist_gradient_boosting/_bitset.pyx | 6 +- .../_gradient_boosting.pyx | 4 +- .../_hist_gradient_boosting/_predictor.pyx | 14 +- .../_hist_gradient_boosting/binning.py | 8 +- .../gradient_boosting.py | 27 +- .../_hist_gradient_boosting/grower.py | 25 +- .../_hist_gradient_boosting/histogram.pyx | 8 +- .../_hist_gradient_boosting/predictor.py | 8 +- .../_hist_gradient_boosting/splitting.pyx | 20 +- .../tests/test_binning.py | 10 +- .../tests/test_bitset.py | 4 +- .../tests/test_compare_lightgbm.py | 12 +- .../tests/test_gradient_boosting.py | 29 +- .../tests/test_grower.py | 19 +- .../tests/test_histogram.py | 16 +- .../tests/test_monotonic_contraints.py | 16 +- .../tests/test_predictor.py | 22 +- .../tests/test_splitting.py | 14 +- .../tests/test_warm_start.py | 12 +- .../_hist_gradient_boosting/utils.pyx | 13 +- sklearn/ensemble/_iforest.py | 15 +- sklearn/ensemble/_stacking.py | 42 +- sklearn/ensemble/_voting.py | 23 +- sklearn/ensemble/_weight_boosting.py | 27 +- sklearn/ensemble/tests/test_bagging.py | 29 +- sklearn/ensemble/tests/test_base.py | 7 +- sklearn/ensemble/tests/test_common.py | 30 +- sklearn/ensemble/tests/test_forest.py | 64 +- .../ensemble/tests/test_gradient_boosting.py | 40 +- .../test_gradient_boosting_loss_functions.py | 25 +- sklearn/ensemble/tests/test_iforest.py | 25 +- sklearn/ensemble/tests/test_stacking.py | 67 +- sklearn/ensemble/tests/test_voting.py | 39 +- .../ensemble/tests/test_weight_boosting.py | 32 +- .../experimental/enable_halving_search_cv.py | 5 +- .../enable_hist_gradient_boosting.py | 1 - .../experimental/enable_iterative_imputer.py | 2 +- sklearn/externals/_arff.py | 511 +-- sklearn/externals/_lobpcg.py | 68 +- sklearn/externals/_numpy_compiler_patch.py | 4 +- sklearn/feature_extraction/__init__.py | 4 +- .../feature_extraction/_dict_vectorizer.py | 4 +- sklearn/feature_extraction/_hashing_fast.pyx | 8 +- sklearn/feature_extraction/image.py | 7 +- .../tests/test_dict_vectorizer.py | 7 +- .../tests/test_feature_hasher.py | 2 +- .../feature_extraction/tests/test_image.py | 10 +- sklearn/feature_extraction/tests/test_text.py | 48 +- sklearn/feature_extraction/text.py | 17 +- sklearn/feature_selection/__init__.py | 40 +- sklearn/feature_selection/_base.py | 8 +- sklearn/feature_selection/_from_model.py | 12 +- sklearn/feature_selection/_mutual_info.py | 4 +- sklearn/feature_selection/_rfe.py | 26 +- sklearn/feature_selection/_sequential.py | 7 +- .../_univariate_selection.py | 6 +- .../feature_selection/_variance_threshold.py | 3 +- sklearn/feature_selection/tests/test_base.py | 3 +- sklearn/feature_selection/tests/test_chi2.py | 5 +- .../tests/test_feature_select.py | 33 +- .../tests/test_from_model.py | 32 +- .../tests/test_mutual_info.py | 9 +- sklearn/feature_selection/tests/test_rfe.py | 25 +- .../tests/test_sequential.py | 12 +- .../tests/test_variance_threshold.py | 4 +- sklearn/gaussian_process/__init__.py | 5 +- sklearn/gaussian_process/_gpc.py | 12 +- sklearn/gaussian_process/_gpr.py | 8 +- sklearn/gaussian_process/kernels.py | 13 +- .../tests/_mini_sequence_kernel.py | 10 +- sklearn/gaussian_process/tests/test_gpc.py | 17 +- sklearn/gaussian_process/tests/test_gpr.py | 20 +- .../gaussian_process/tests/test_kernels.py | 38 +- sklearn/impute/_base.py | 9 +- sklearn/impute/_iterative.py | 17 +- sklearn/impute/_knn.py | 9 +- sklearn/impute/tests/test_base.py | 3 +- sklearn/impute/tests/test_common.py | 18 +- sklearn/impute/tests/test_impute.py | 34 +- sklearn/impute/tests/test_knn.py | 3 +- sklearn/inspection/__init__.py | 7 +- sklearn/inspection/_partial_dependence.py | 23 +- sklearn/inspection/_permutation_importance.py | 6 +- sklearn/inspection/_plot/decision_boundary.py | 7 +- .../inspection/_plot/partial_dependence.py | 9 +- .../tests/test_boundary_decision_display.py | 20 +- .../tests/test_plot_partial_dependence.py | 24 +- .../tests/test_partial_dependence.py | 47 +- .../tests/test_permutation_importance.py | 30 +- sklearn/isotonic.py | 10 +- sklearn/kernel_approximation.py | 14 +- sklearn/kernel_ridge.py | 6 +- sklearn/linear_model/__init__.py | 46 +- sklearn/linear_model/_base.py | 28 +- sklearn/linear_model/_bayes.py | 7 +- sklearn/linear_model/_cd_fast.pyx | 24 +- sklearn/linear_model/_coordinate_descent.py | 15 +- sklearn/linear_model/_glm/__init__.py | 4 +- sklearn/linear_model/_glm/glm.py | 6 +- sklearn/linear_model/_glm/tests/test_glm.py | 18 +- sklearn/linear_model/_huber.py | 5 +- sklearn/linear_model/_least_angle.py | 17 +- sklearn/linear_model/_linear_loss.py | 1 + sklearn/linear_model/_logistic.py | 34 +- sklearn/linear_model/_omp.py | 7 +- sklearn/linear_model/_passive_aggressive.py | 4 +- sklearn/linear_model/_quantile.py | 4 +- sklearn/linear_model/_ransac.py | 19 +- sklearn/linear_model/_ridge.py | 43 +- sklearn/linear_model/_sag.py | 6 +- sklearn/linear_model/_sgd_fast.pyx | 9 +- sklearn/linear_model/_stochastic_gradient.py | 44 +- sklearn/linear_model/_theil_sen.py | 11 +- sklearn/linear_model/setup.py | 1 + sklearn/linear_model/tests/test_base.py | 33 +- sklearn/linear_model/tests/test_bayes.py | 14 +- sklearn/linear_model/tests/test_common.py | 23 +- .../tests/test_coordinate_descent.py | 57 +- sklearn/linear_model/tests/test_huber.py | 11 +- .../linear_model/tests/test_least_angle.py | 29 +- .../linear_model/tests/test_linear_loss.py | 9 +- sklearn/linear_model/tests/test_logistic.py | 41 +- sklearn/linear_model/tests/test_omp.py | 23 +- .../tests/test_passive_aggressive.py | 19 +- sklearn/linear_model/tests/test_perceptron.py | 7 +- sklearn/linear_model/tests/test_quantile.py | 5 +- sklearn/linear_model/tests/test_ransac.py | 19 +- sklearn/linear_model/tests/test_ridge.py | 89 +- sklearn/linear_model/tests/test_sag.py | 25 +- sklearn/linear_model/tests/test_sgd.py | 37 +- .../tests/test_sparse_coordinate_descent.py | 15 +- sklearn/linear_model/tests/test_theil_sen.py | 16 +- sklearn/manifold/__init__.py | 2 +- sklearn/manifold/_barnes_hut_tsne.pyx | 7 +- sklearn/manifold/_isomap.py | 9 +- sklearn/manifold/_locally_linear.py | 13 +- sklearn/manifold/_mds.py | 9 +- sklearn/manifold/_spectral_embedding.py | 12 +- sklearn/manifold/_t_sne.py | 17 +- sklearn/manifold/_utils.pyx | 4 +- sklearn/manifold/tests/test_isomap.py | 14 +- sklearn/manifold/tests/test_locally_linear.py | 12 +- sklearn/manifold/tests/test_mds.py | 2 +- .../manifold/tests/test_spectral_embedding.py | 24 +- sklearn/manifold/tests/test_t_sne.py | 56 +- sklearn/metrics/__init__.py | 180 +- sklearn/metrics/_classification.py | 26 +- sklearn/metrics/_dist_metrics.pxd | 3 +- sklearn/metrics/_dist_metrics.pyx | 14 +- .../metrics/_pairwise_distances_reduction.pyx | 48 +- sklearn/metrics/_plot/confusion_matrix.py | 7 +- sklearn/metrics/_plot/det_curve.py | 9 +- .../metrics/_plot/precision_recall_curve.py | 8 +- sklearn/metrics/_plot/roc_curve.py | 9 +- sklearn/metrics/_plot/tests/test_base.py | 3 +- .../_plot/tests/test_common_curve_display.py | 7 +- .../tests/test_confusion_matrix_display.py | 12 +- .../_plot/tests/test_det_curve_display.py | 6 +- .../_plot/tests/test_plot_confusion_matrix.py | 15 +- .../_plot/tests/test_plot_curve_common.py | 7 +- .../_plot/tests/test_plot_det_curve.py | 6 +- .../_plot/tests/test_plot_precision_recall.py | 19 +- .../_plot/tests/test_plot_roc_curve.py | 13 +- .../tests/test_precision_recall_display.py | 9 +- .../_plot/tests/test_roc_curve_display.py | 14 +- sklearn/metrics/_ranking.py | 18 +- sklearn/metrics/_regression.py | 7 +- sklearn/metrics/_scorer.py | 64 +- sklearn/metrics/cluster/__init__.py | 40 +- sklearn/metrics/cluster/_bicluster.py | 2 +- .../cluster/_expected_mutual_info_fast.pyx | 6 +- sklearn/metrics/cluster/_supervised.py | 2 +- sklearn/metrics/cluster/_unsupervised.py | 7 +- .../metrics/cluster/tests/test_bicluster.py | 5 +- sklearn/metrics/cluster/tests/test_common.py | 30 +- .../metrics/cluster/tests/test_supervised.py | 37 +- .../cluster/tests/test_unsupervised.py | 14 +- sklearn/metrics/pairwise.py | 28 +- sklearn/metrics/setup.py | 2 +- sklearn/metrics/tests/test_classification.py | 83 +- sklearn/metrics/tests/test_common.py | 114 +- sklearn/metrics/tests/test_dist_metrics.py | 9 +- sklearn/metrics/tests/test_pairwise.py | 89 +- .../test_pairwise_distances_reduction.py | 9 +- sklearn/metrics/tests/test_ranking.py | 65 +- sklearn/metrics/tests/test_regression.py | 52 +- sklearn/metrics/tests/test_score_objects.py | 65 +- sklearn/mixture/__init__.py | 3 +- sklearn/mixture/_base.py | 3 +- sklearn/mixture/_bayesian_mixture.py | 17 +- sklearn/mixture/_gaussian_mixture.py | 4 +- .../mixture/tests/test_bayesian_mixture.py | 20 +- .../mixture/tests/test_gaussian_mixture.py | 31 +- sklearn/mixture/tests/test_mixture.py | 5 +- sklearn/model_selection/__init__.py | 61 +- sklearn/model_selection/_search.py | 40 +- .../_search_successive_halving.py | 10 +- sklearn/model_selection/_split.py | 18 +- sklearn/model_selection/_validation.py | 23 +- sklearn/model_selection/tests/test_search.py | 112 +- sklearn/model_selection/tests/test_split.py | 81 +- .../tests/test_successive_halving.py | 25 +- .../model_selection/tests/test_validation.py | 135 +- sklearn/multiclass.py | 32 +- sklearn/multioutput.py | 20 +- sklearn/naive_bayes.py | 10 +- sklearn/neighbors/__init__.py | 21 +- sklearn/neighbors/_base.py | 29 +- sklearn/neighbors/_binary_tree.pxi | 17 +- sklearn/neighbors/_classification.py | 16 +- sklearn/neighbors/_graph.py | 5 +- sklearn/neighbors/_kde.py | 8 +- sklearn/neighbors/_lof.py | 9 +- sklearn/neighbors/_nca.py | 18 +- sklearn/neighbors/_nearest_centroid.py | 5 +- sklearn/neighbors/_partition_nodes.pxd | 1 + sklearn/neighbors/_quad_tree.pyx | 12 +- sklearn/neighbors/_regression.py | 9 +- sklearn/neighbors/_unsupervised.py | 4 +- sklearn/neighbors/tests/test_ball_tree.py | 3 +- sklearn/neighbors/tests/test_kd_tree.py | 4 +- sklearn/neighbors/tests/test_kde.py | 13 +- sklearn/neighbors/tests/test_lof.py | 16 +- sklearn/neighbors/tests/test_nca.py | 13 +- .../neighbors/tests/test_nearest_centroid.py | 4 +- sklearn/neighbors/tests/test_neighbors.py | 48 +- .../tests/test_neighbors_pipeline.py | 25 +- .../neighbors/tests/test_neighbors_tree.py | 29 +- sklearn/neighbors/tests/test_quad_tree.py | 2 +- sklearn/neural_network/__init__.py | 4 +- sklearn/neural_network/_base.py | 1 - .../neural_network/_multilayer_perceptron.py | 41 +- sklearn/neural_network/_rbm.py | 10 +- sklearn/neural_network/tests/test_base.py | 5 +- sklearn/neural_network/tests/test_mlp.py | 30 +- sklearn/neural_network/tests/test_rbm.py | 14 +- .../tests/test_stochastic_optimizers.py | 3 +- sklearn/pipeline.py | 19 +- sklearn/preprocessing/__init__.py | 57 +- .../_csr_polynomial_expansion.pyx | 3 +- sklearn/preprocessing/_data.py | 22 +- sklearn/preprocessing/_discretization.py | 16 +- sklearn/preprocessing/_encoders.py | 9 +- sklearn/preprocessing/_label.py | 13 +- sklearn/preprocessing/_polynomial.py | 10 +- sklearn/preprocessing/tests/test_common.py | 38 +- sklearn/preprocessing/tests/test_data.py | 83 +- .../tests/test_discretization.py | 10 +- sklearn/preprocessing/tests/test_encoders.py | 14 +- .../tests/test_function_transformer.py | 8 +- sklearn/preprocessing/tests/test_label.py | 41 +- .../preprocessing/tests/test_polynomial.py | 6 +- sklearn/random_projection.py | 8 +- sklearn/semi_supervised/_label_propagation.py | 5 +- sklearn/semi_supervised/_self_training.py | 6 +- .../tests/test_label_propagation.py | 14 +- .../tests/test_self_training.py | 11 +- sklearn/setup.py | 4 +- sklearn/svm/__init__.py | 2 +- sklearn/svm/_base.py | 34 +- sklearn/svm/_bounds.py | 2 +- sklearn/svm/_classes.py | 11 +- sklearn/svm/_liblinear.pyx | 7 +- sklearn/svm/_libsvm.pyx | 5 +- sklearn/svm/_libsvm_sparse.pyx | 8 +- sklearn/svm/setup.py | 1 + sklearn/svm/tests/test_bounds.py | 10 +- sklearn/svm/tests/test_sparse.py | 12 +- sklearn/svm/tests/test_svm.py | 37 +- sklearn/tests/random_seed.py | 3 +- sklearn/tests/test_base.py | 35 +- sklearn/tests/test_build.py | 3 +- sklearn/tests/test_calibration.py | 59 +- sklearn/tests/test_common.py | 48 +- sklearn/tests/test_config.py | 4 +- sklearn/tests/test_discriminant_analysis.py | 31 +- sklearn/tests/test_docstring_parameters.py | 30 +- sklearn/tests/test_docstrings.py | 11 +- sklearn/tests/test_dummy.py | 16 +- sklearn/tests/test_isotonic.py | 20 +- sklearn/tests/test_kernel_approximation.py | 26 +- sklearn/tests/test_kernel_ridge.py | 7 +- sklearn/tests/test_metaestimators.py | 27 +- sklearn/tests/test_min_dependencies_readme.py | 3 +- sklearn/tests/test_multiclass.py | 56 +- sklearn/tests/test_multioutput.py | 56 +- sklearn/tests/test_naive_bayes.py | 28 +- sklearn/tests/test_pipeline.py | 49 +- sklearn/tests/test_random_projection.py | 32 +- sklearn/tree/__init__.py | 14 +- sklearn/tree/_classes.py | 42 +- sklearn/tree/_criterion.pxd | 12 +- sklearn/tree/_criterion.pyx | 9 +- sklearn/tree/_export.py | 9 +- sklearn/tree/_splitter.pxd | 11 +- sklearn/tree/_splitter.pyx | 17 +- sklearn/tree/_tree.pxd | 5 +- sklearn/tree/_tree.pyx | 21 +- sklearn/tree/_utils.pxd | 4 +- sklearn/tree/_utils.pyx | 6 +- sklearn/tree/tests/test_export.py | 13 +- sklearn/tree/tests/test_reingold_tilford.py | 3 +- sklearn/tree/tests/test_tree.py | 80 +- sklearn/utils/__init__.py | 41 +- sklearn/utils/_cython_blas.pyx | 37 +- sklearn/utils/_encode.py | 3 +- sklearn/utils/_estimator_html_repr.py | 5 +- sklearn/utils/_fast_dict.pxd | 3 +- sklearn/utils/_fast_dict.pyx | 11 +- sklearn/utils/_joblib.py | 21 +- sklearn/utils/_logistic_sigmoid.pyx | 3 +- sklearn/utils/_mask.py | 3 +- sklearn/utils/_mocking.py | 3 +- sklearn/utils/_pprint.py | 2 +- sklearn/utils/_random.pxd | 2 + sklearn/utils/_random.pyx | 3 + sklearn/utils/_readonly_array_wrapper.pyx | 4 +- sklearn/utils/_show_versions.py | 9 +- sklearn/utils/_sorting.pxd | 3 +- sklearn/utils/_sorting.pyx | 1 + sklearn/utils/_testing.py | 51 +- sklearn/utils/_vector_sentinel.pxd | 4 +- sklearn/utils/_vector_sentinel.pyx | 6 +- sklearn/utils/arrayfuncs.pyx | 10 +- sklearn/utils/class_weight.py | 1 - sklearn/utils/deprecation.py | 3 +- sklearn/utils/estimator_checks.py | 91 +- sklearn/utils/fixes.py | 14 +- sklearn/utils/graph.py | 2 +- sklearn/utils/metaestimators.py | 13 +- sklearn/utils/multiclass.py | 9 +- sklearn/utils/murmurhash.pyx | 2 + sklearn/utils/optimize.py | 5 +- sklearn/utils/random.py | 3 +- sklearn/utils/sparsefuncs.py | 10 +- sklearn/utils/sparsefuncs_fast.pyx | 4 +- sklearn/utils/tests/test_arrayfuncs.py | 2 +- sklearn/utils/tests/test_class_weight.py | 7 +- sklearn/utils/tests/test_cython_blas.py | 33 +- sklearn/utils/tests/test_cython_templating.py | 2 + sklearn/utils/tests/test_deprecation.py | 4 +- sklearn/utils/tests/test_encode.py | 5 +- sklearn/utils/tests/test_estimator_checks.py | 50 +- .../utils/tests/test_estimator_html_repr.py | 40 +- sklearn/utils/tests/test_extmath.py | 51 +- sklearn/utils/tests/test_fixes.py | 4 +- sklearn/utils/tests/test_graph.py | 4 +- sklearn/utils/tests/test_metaestimators.py | 9 +- sklearn/utils/tests/test_mocking.py | 10 +- sklearn/utils/tests/test_multiclass.py | 51 +- sklearn/utils/tests/test_murmurhash.py | 4 +- sklearn/utils/tests/test_optimize.py | 3 +- sklearn/utils/tests/test_parallel.py | 3 +- sklearn/utils/tests/test_pprint.py | 9 +- sklearn/utils/tests/test_random.py | 4 +- sklearn/utils/tests/test_readonly_wrapper.py | 1 - sklearn/utils/tests/test_seq_dataset.py | 4 +- sklearn/utils/tests/test_shortest_path.py | 1 + sklearn/utils/tests/test_show_versions.py | 6 +- sklearn/utils/tests/test_sparsefuncs.py | 19 +- sklearn/utils/tests/test_tags.py | 5 +- sklearn/utils/tests/test_testing.py | 41 +- sklearn/utils/tests/test_utils.py | 51 +- sklearn/utils/tests/test_validation.py | 87 +- sklearn/utils/tests/test_weight_vector.py | 6 +- sklearn/utils/validation.py | 18 +- 828 files changed, 8016 insertions(+), 7304 deletions(-) diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py index ddf9bda3492de..a022c3c4dd2a7 100644 --- a/.github/scripts/label_title_regex.py +++ b/.github/scripts/label_title_regex.py @@ -1,10 +1,11 @@ """Labels PRs based on title. Must be run in a github action with the pull_request_target event.""" -from github import Github -import os import json +import os import re +from github import Github + context_dict = json.loads(os.getenv("CONTEXT_GITHUB")) repo = context_dict["repository"] diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py index 5973947a7d295..ebf64d07e557b 100644 --- a/asv_benchmarks/benchmarks/cluster.py +++ b/asv_benchmarks/benchmarks/cluster.py @@ -1,7 +1,7 @@ from sklearn.cluster import KMeans, MiniBatchKMeans from .common import Benchmark, Estimator, Predictor, Transformer -from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset +from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset from .utils import neg_mean_inertia diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py index c3e114a212047..aeea558844587 100644 --- a/asv_benchmarks/benchmarks/common.py +++ b/asv_benchmarks/benchmarks/common.py @@ -1,11 +1,11 @@ -import os +import itertools import json -import timeit +import os import pickle -import itertools +import timeit from abc import ABC, abstractmethod -from pathlib import Path from multiprocessing import cpu_count +from pathlib import Path import numpy as np diff --git a/asv_benchmarks/benchmarks/datasets.py b/asv_benchmarks/benchmarks/datasets.py index dbe0eac0b822c..b8d2fdc9bad21 100644 --- a/asv_benchmarks/benchmarks/datasets.py +++ b/asv_benchmarks/benchmarks/datasets.py @@ -1,21 +1,22 @@ +from pathlib import Path + import numpy as np import scipy.sparse as sp -from joblib import Memory -from pathlib import Path -from sklearn.decomposition import TruncatedSVD +from joblib import Memory from sklearn.datasets import ( - make_blobs, fetch_20newsgroups, + fetch_olivetti_faces, fetch_openml, load_digits, - make_regression, + make_blobs, make_classification, - fetch_olivetti_faces, + make_regression, ) -from sklearn.preprocessing import MaxAbsScaler, StandardScaler +from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MaxAbsScaler, StandardScaler # memory location for caching datasets M = Memory(location=str(Path(__file__).resolve().parent / "cache")) diff --git a/asv_benchmarks/benchmarks/decomposition.py b/asv_benchmarks/benchmarks/decomposition.py index b5e71cdd0b556..d986222758970 100644 --- a/asv_benchmarks/benchmarks/decomposition.py +++ b/asv_benchmarks/benchmarks/decomposition.py @@ -1,8 +1,8 @@ from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning from .common import Benchmark, Estimator, Transformer -from .datasets import _olivetti_faces_dataset, _mnist_dataset -from .utils import make_pca_scorers, make_dict_learning_scorers +from .datasets import _mnist_dataset, _olivetti_faces_dataset +from .utils import make_dict_learning_scorers, make_pca_scorers class PCABenchmark(Transformer, Estimator, Benchmark): diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py index 8c5a28e3da90f..c336d1e5f8805 100644 --- a/asv_benchmarks/benchmarks/ensemble.py +++ b/asv_benchmarks/benchmarks/ensemble.py @@ -1,7 +1,7 @@ from sklearn.ensemble import ( - RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, + RandomForestClassifier, ) from .common import Benchmark, Estimator, Predictor diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py index 663ceca61d063..8ed9b02ec4dfe 100644 --- a/asv_benchmarks/benchmarks/linear_model.py +++ b/asv_benchmarks/benchmarks/linear_model.py @@ -1,9 +1,9 @@ from sklearn.linear_model import ( - LogisticRegression, - Ridge, ElasticNet, Lasso, LinearRegression, + LogisticRegression, + Ridge, SGDRegressor, ) diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py index cf38bc73a38ec..d63a476a8e438 100644 --- a/benchmarks/bench_20newsgroups.py +++ b/benchmarks/bench_20newsgroups.py @@ -1,18 +1,19 @@ -from time import time import argparse -import numpy as np +from time import time -from sklearn.dummy import DummyClassifier +import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.metrics import accuracy_score -from sklearn.utils.validation import check_array - -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import AdaBoostClassifier +from sklearn.dummy import DummyClassifier +from sklearn.ensemble import ( + AdaBoostClassifier, + ExtraTreesClassifier, + RandomForestClassifier, +) from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score from sklearn.naive_bayes import MultinomialNB +from sklearn.utils.validation import check_array ESTIMATORS = { "dummy": DummyClassifier(), diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py index 8a13a2d9806c6..ca8b58780d946 100644 --- a/benchmarks/bench_covertype.py +++ b/benchmarks/bench_covertype.py @@ -45,20 +45,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np -from joblib import Memory +from joblib import Memory from sklearn.datasets import fetch_covtype, get_data_home -from sklearn.svm import LinearSVC -from sklearn.linear_model import SGDClassifier, LogisticRegression +from sklearn.ensemble import ( + ExtraTreesClassifier, + GradientBoostingClassifier, + RandomForestClassifier, +) +from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.metrics import zero_one_loss from sklearn.naive_bayes import GaussianNB +from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.metrics import zero_one_loss from sklearn.utils import check_array # Memoize the data extraction and memory map the resulting diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py index 98fa17b99f47a..2b48a43a1b8d0 100644 --- a/benchmarks/bench_feature_expansions.py +++ b/benchmarks/bench_feature_expansions.py @@ -1,8 +1,10 @@ +from time import time + import matplotlib.pyplot as plt import numpy as np import scipy.sparse as sparse + from sklearn.preprocessing import PolynomialFeatures -from time import time degree = 2 trials = 3 diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py index 06ca4d1276e1c..98d1aedc7fd8a 100644 --- a/benchmarks/bench_glm.py +++ b/benchmarks/bench_glm.py @@ -5,9 +5,10 @@ """ from datetime import datetime + import numpy as np -from sklearn import linear_model +from sklearn import linear_model if __name__ == "__main__": diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py index 8a0a0545bb627..7b111f95044e2 100644 --- a/benchmarks/bench_glmnet.py +++ b/benchmarks/bench_glmnet.py @@ -16,9 +16,11 @@ In both cases, only 10% of the features are informative. """ -import numpy as np import gc from time import time + +import numpy as np + from sklearn.datasets import make_regression alpha = 0.1 @@ -45,11 +47,11 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef): if __name__ == "__main__": - from glmnet.elastic_net import Lasso as GlmnetLasso - from sklearn.linear_model import Lasso as ScikitLasso - # Delayed import of matplotlib.pyplot import matplotlib.pyplot as plt + from glmnet.elastic_net import Lasso as GlmnetLasso + + from sklearn.linear_model import Lasso as ScikitLasso scikit_results = [] glmnet_results = [] diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py index 163e21f98ed0d..c1dfffabe71c2 100644 --- a/benchmarks/bench_hist_gradient_boosting.py +++ b/benchmarks/bench_hist_gradient_boosting.py @@ -1,15 +1,16 @@ -from time import time import argparse +from time import time import matplotlib.pyplot as plt import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression -from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py index 1b5905b1cf4e8..5fa5bbae0c35c 100644 --- a/benchmarks/bench_hist_gradient_boosting_adult.py +++ b/benchmarks/bench_hist_gradient_boosting_adult.py @@ -4,15 +4,14 @@ import numpy as np import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.compose import make_column_transformer, make_column_selector +from sklearn.compose import make_column_selector, make_column_transformer from sklearn.datasets import fetch_openml -from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split from sklearn.preprocessing import OrdinalEncoder - parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) parser.add_argument("--n-trees", type=int, default=100) diff --git a/benchmarks/bench_hist_gradient_boosting_categorical_only.py b/benchmarks/bench_hist_gradient_boosting_categorical_only.py index e8d215170f9c8..1085bbc49f4f8 100644 --- a/benchmarks/bench_hist_gradient_boosting_categorical_only.py +++ b/benchmarks/bench_hist_gradient_boosting_categorical_only.py @@ -1,11 +1,10 @@ import argparse from time import time -from sklearn.preprocessing import KBinsDiscretizer from sklearn.datasets import make_classification from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.preprocessing import KBinsDiscretizer parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py index abe8018adfd83..197fc6ae43844 100644 --- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py +++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py @@ -1,17 +1,17 @@ -from urllib.request import urlretrieve +import argparse import os from gzip import GzipFile from time import time -import argparse +from urllib.request import urlretrieve import numpy as np import pandas as pd + from joblib import Memory -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_threading.py b/benchmarks/bench_hist_gradient_boosting_threading.py index 70787fd2eb479..9acf65bdbaf6a 100644 --- a/benchmarks/bench_hist_gradient_boosting_threading.py +++ b/benchmarks/bench_hist_gradient_boosting_threading.py @@ -1,18 +1,19 @@ -from time import time import argparse import os from pprint import pprint +from time import time import numpy as np from threadpoolctl import threadpool_limits + import sklearn -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) @@ -290,8 +291,8 @@ def one_run(n_threads, n_samples): if args.plot or args.plot_filename: - import matplotlib.pyplot as plt import matplotlib + import matplotlib.pyplot as plt fig, axs = plt.subplots(2, figsize=(12, 12)) diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py index 968ecf20876ae..5caf31e4ec4de 100644 --- a/benchmarks/bench_isolation_forest.py +++ b/benchmarks/bench_isolation_forest.py @@ -17,12 +17,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml from sklearn.ensemble import IsolationForest -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.preprocessing import LabelBinarizer from sklearn.utils import shuffle as sh diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py index 458a04a463303..221e6fb12da75 100644 --- a/benchmarks/bench_isotonic.py +++ b/benchmarks/bench_isotonic.py @@ -10,13 +10,15 @@ This allows the scaling of the algorithm with the problem size to be visualized and understood. """ -import numpy as np +import argparse import gc from datetime import datetime -from sklearn.isotonic import isotonic_regression -from scipy.special import expit + import matplotlib.pyplot as plt -import argparse +import numpy as np +from scipy.special import expit + +from sklearn.isotonic import isotonic_regression def generate_perturbed_logarithm_dataset(size): diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index e4eddf9cb745a..bfd6caf374b4c 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -39,13 +39,12 @@ import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py index b6d82647012d5..e23261f4e871b 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py @@ -41,13 +41,12 @@ import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py index 50d1b5466a345..524ac7dfbad63 100644 --- a/benchmarks/bench_lasso.py +++ b/benchmarks/bench_lasso.py @@ -13,6 +13,7 @@ """ import gc from time import time + import numpy as np from sklearn.datasets import make_regression @@ -61,9 +62,10 @@ def compute_bench(alpha, n_samples, n_features, precompute): if __name__ == "__main__": - from sklearn.linear_model import Lasso, LassoLars import matplotlib.pyplot as plt + from sklearn.linear_model import Lasso, LassoLars + alpha = 0.01 # regularization parameter n_features = 10 diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py index 31057e2e4067b..8652073a7203d 100644 --- a/benchmarks/bench_lof.py +++ b/benchmarks/bench_lof.py @@ -18,11 +18,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.neighbors import LocalOutlierFactor -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml from sklearn.preprocessing import LabelBinarizer print(__doc__) diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py index c50bfc2e594d6..5eca252282d11 100644 --- a/benchmarks/bench_mnist.py +++ b/benchmarks/bench_mnist.py @@ -30,26 +30,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np -from joblib import Memory -from sklearn.datasets import fetch_openml -from sklearn.datasets import get_data_home -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import RandomForestClassifier +from joblib import Memory +from sklearn.datasets import fetch_openml, get_data_home from sklearn.dummy import DummyClassifier -from sklearn.kernel_approximation import Nystroem -from sklearn.kernel_approximation import RBFSampler +from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier +from sklearn.kernel_approximation import Nystroem, RBFSampler +from sklearn.linear_model import LogisticRegression from sklearn.metrics import zero_one_loss +from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier from sklearn.utils import check_array -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier # Memoize the data extraction and memory map the resulting # train / test splits in readonly mode diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py index 2a87b388e91a2..1b8449a24da51 100755 --- a/benchmarks/bench_multilabel_metrics.py +++ b/benchmarks/bench_multilabel_metrics.py @@ -3,26 +3,25 @@ A comparison of multilabel target formats and metrics over them """ -from timeit import timeit -from functools import partial -import itertools import argparse +import itertools import sys +from functools import partial +from timeit import timeit import matplotlib.pyplot as plt -import scipy.sparse as sp import numpy as np +import scipy.sparse as sp from sklearn.datasets import make_multilabel_classification from sklearn.metrics import ( - f1_score, accuracy_score, + f1_score, hamming_loss, jaccard_similarity_score, ) from sklearn.utils._testing import ignore_warnings - METRICS = { "f1": partial(f1_score, average="micro"), "f1-by-sample": partial(f1_score, average="samples"), diff --git a/benchmarks/bench_online_ocsvm.py b/benchmarks/bench_online_ocsvm.py index c7eaefe082948..95a68336c66aa 100644 --- a/benchmarks/bench_online_ocsvm.py +++ b/benchmarks/bench_online_ocsvm.py @@ -15,21 +15,20 @@ """ from time import time -import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import numpy as np from scipy.interpolate import interp1d -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype -from sklearn.preprocessing import LabelBinarizer, StandardScaler -from sklearn.pipeline import make_pipeline -from sklearn.utils import shuffle +from sklearn.datasets import fetch_covtype, fetch_kddcup99 from sklearn.kernel_approximation import Nystroem -from sklearn.svm import OneClassSVM from sklearn.linear_model import SGDOneClassSVM - -import matplotlib.pyplot as plt -import matplotlib +from sklearn.metrics import auc, roc_curve +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import LabelBinarizer, StandardScaler +from sklearn.svm import OneClassSVM +from sklearn.utils import shuffle font = {"weight": "normal", "size": 15} diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py index edbf9412deca2..0d8fe261cb338 100644 --- a/benchmarks/bench_plot_fastkmeans.py +++ b/benchmarks/bench_plot_fastkmeans.py @@ -98,8 +98,8 @@ def compute_bench_2(chunks): if __name__ == "__main__": - from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection samples_range = np.linspace(50, 150, 5).astype(int) features_range = np.linspace(150, 50000, 5).astype(int) diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py index 0f42e4b630f1d..49b87c8c7060a 100644 --- a/benchmarks/bench_plot_incremental_pca.py +++ b/benchmarks/bench_plot_incremental_pca.py @@ -7,13 +7,15 @@ """ -import numpy as np import gc -from time import time from collections import defaultdict +from time import time + import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_lfw_people -from sklearn.decomposition import IncrementalPCA, PCA +from sklearn.decomposition import PCA, IncrementalPCA def plot_results(X, y, label): diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py index 4373c70223976..37bb5c6ab0071 100644 --- a/benchmarks/bench_plot_lasso_path.py +++ b/benchmarks/bench_plot_lasso_path.py @@ -2,16 +2,15 @@ The input data is mostly low rank but is a fat infinite tail. """ -from collections import defaultdict import gc import sys +from collections import defaultdict from time import time import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram -from sklearn.linear_model import lasso_path from sklearn.datasets import make_regression +from sklearn.linear_model import lars_path, lars_path_gram, lasso_path def compute_bench(samples_range, features_range): @@ -81,8 +80,8 @@ def compute_bench(samples_range, features_range): if __name__ == "__main__": - from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection samples_range = np.linspace(10, 2000, 5).astype(int) features_range = np.linspace(10, 2000, 5).astype(int) diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py index c6e5541eda6f3..2d9cf2b08b71d 100644 --- a/benchmarks/bench_plot_neighbors.py +++ b/benchmarks/bench_plot_neighbors.py @@ -3,11 +3,11 @@ """ from time import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib import ticker -from sklearn import neighbors, datasets +from sklearn import datasets, neighbors def get_data(N, D, dataset="dense"): diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index 745828955f9e8..8ac667f0e433d 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -6,28 +6,25 @@ # Anthony Di Franco (projected gradient, Python and NumPy port) # License: BSD 3 clause -from time import time +import numbers import sys import warnings -import numbers +from time import time -import numpy as np import matplotlib.pyplot as plt -from joblib import Memory +import numpy as np import pandas -from sklearn.utils._testing import ignore_warnings -from sklearn.feature_extraction.text import TfidfVectorizer +from joblib import Memory from sklearn.decomposition import NMF -from sklearn.decomposition._nmf import _initialize_nmf -from sklearn.decomposition._nmf import _beta_divergence -from sklearn.decomposition._nmf import _check_init +from sklearn.decomposition._nmf import _beta_divergence, _check_init, _initialize_nmf from sklearn.exceptions import ConvergenceWarning -from sklearn.utils.extmath import safe_sparse_dot, squared_norm +from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.utils import check_array +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.extmath import safe_sparse_dot, squared_norm from sklearn.utils.validation import check_is_fitted, check_non_negative - mem = Memory(cachedir=".", verbose=0) ################### diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index 4325e1fb17f3c..af94d38061ac7 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -9,8 +9,8 @@ import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp from sklearn.datasets import make_sparse_coded_signal +from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp def compute_bench(samples_range, features_range): diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py index a41e3fab20589..ca12972f9be6c 100644 --- a/benchmarks/bench_plot_parallel_pairwise.py +++ b/benchmarks/bench_plot_parallel_pairwise.py @@ -4,9 +4,8 @@ import matplotlib.pyplot as plt +from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels from sklearn.utils import check_random_state -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.metrics.pairwise import pairwise_kernels def plot(func): diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py index b21589263a49f..e48de2881326e 100644 --- a/benchmarks/bench_plot_polynomial_kernel_approximation.py +++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py @@ -41,22 +41,22 @@ # Author: Daniel Lopez-Sanchez # License: BSD 3 clause -# Load data manipulation functions -from sklearn.datasets import load_digits -from sklearn.model_selection import train_test_split +# Will use this for timing results +from time import time # Some common libraries import matplotlib.pyplot as plt import numpy as np -# Will use this for timing results -from time import time - -# Import SVM classifiers and feature map approximation algorithms -from sklearn.svm import LinearSVC, SVC +# Load data manipulation functions +from sklearn.datasets import load_digits from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch +from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline +# Import SVM classifiers and feature map approximation algorithms +from sklearn.svm import SVC, LinearSVC + # Split data in train and test sets X, y = load_digits()["data"], load_digits()["target"] X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7) diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py index c7d67fa2a545d..018558ec1d833 100644 --- a/benchmarks/bench_plot_randomized_svd.py +++ b/benchmarks/bench_plot_randomized_svd.py @@ -65,28 +65,29 @@ # Author: Giorgio Patrini -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - import gc +import os.path import pickle -from time import time from collections import defaultdict -import os.path +from time import time + +import matplotlib.pyplot as plt +import numpy as np +import scipy as sp -from sklearn.utils._arpack import _init_arpack_v0 -from sklearn.utils import gen_batches -from sklearn.utils.validation import check_random_state -from sklearn.utils.extmath import randomized_svd -from sklearn.datasets import make_low_rank_matrix, make_sparse_uncorrelated from sklearn.datasets import ( - fetch_lfw_people, - fetch_openml, fetch_20newsgroups_vectorized, + fetch_lfw_people, fetch_olivetti_faces, + fetch_openml, fetch_rcv1, + make_low_rank_matrix, + make_sparse_uncorrelated, ) +from sklearn.utils import gen_batches +from sklearn.utils._arpack import _init_arpack_v0 +from sklearn.utils.extmath import randomized_svd +from sklearn.utils.validation import check_random_state try: import fbpca diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index 52d22f6a9c8a0..40f91b57f9e23 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -3,13 +3,14 @@ The data is mostly low rank but is a fat infinite tail. """ import gc -from time import time -import numpy as np from collections import defaultdict +from time import time +import numpy as np from scipy.linalg import svd -from sklearn.utils.extmath import randomized_svd + from sklearn.datasets import make_low_rank_matrix +from sklearn.utils.extmath import randomized_svd def compute_bench(samples_range, features_range, n_iter=3, rank=50): @@ -53,8 +54,8 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50): if __name__ == "__main__": - from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection samples_range = np.linspace(2, 1000, 4).astype(int) features_range = np.linspace(2, 1000, 4).astype(int) diff --git a/benchmarks/bench_plot_ward.py b/benchmarks/bench_plot_ward.py index 696e833eede20..fe5cee201dff4 100644 --- a/benchmarks/bench_plot_ward.py +++ b/benchmarks/bench_plot_ward.py @@ -4,9 +4,9 @@ import time +import matplotlib.pyplot as plt import numpy as np from scipy.cluster import hierarchy -import matplotlib.pyplot as plt from sklearn.cluster import AgglomerativeClustering diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py index 89a4550944f3f..bd8c62ecba484 100644 --- a/benchmarks/bench_random_projections.py +++ b/benchmarks/bench_random_projections.py @@ -6,19 +6,19 @@ Benchmarks for random projections. """ +import collections import gc -import sys import optparse +import sys from datetime import datetime -import collections import numpy as np import scipy.sparse as sp from sklearn import clone from sklearn.random_projection import ( - SparseRandomProjection, GaussianRandomProjection, + SparseRandomProjection, johnson_lindenstrauss_min_dim, ) diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py index e8fce1c414abf..a6a5e3b4fd450 100644 --- a/benchmarks/bench_rcv1_logreg_convergence.py +++ b/benchmarks/bench_rcv1_logreg_convergence.py @@ -3,14 +3,15 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt -from joblib import Memory -import numpy as np import gc import time -from sklearn.linear_model import LogisticRegression, SGDClassifier +import matplotlib.pyplot as plt +import numpy as np + +from joblib import Memory from sklearn.datasets import fetch_rcv1 +from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.linear_model._sag import get_auto_step_size try: diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py index 581f7e3881e9e..997dde6bbb08d 100644 --- a/benchmarks/bench_saga.py +++ b/benchmarks/bench_saga.py @@ -4,25 +4,25 @@ in using multinomial logistic regression in term of learning time. """ import json -import time import os +import time -from joblib import Parallel -from sklearn.utils.fixes import delayed import matplotlib.pyplot as plt import numpy as np +from joblib import Parallel from sklearn.datasets import ( + fetch_20newsgroups_vectorized, fetch_rcv1, - load_iris, load_digits, - fetch_20newsgroups_vectorized, + load_iris, ) from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelBinarizer, LabelEncoder from sklearn.utils.extmath import safe_sparse_dot, softmax +from sklearn.utils.fixes import delayed def fit_single( diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index 4f1041a6d1022..f4e37c293a5d8 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -3,14 +3,14 @@ """ import gc -import sys +import operator import optparse +import random +import sys from datetime import datetime -import operator import matplotlib.pyplot as plt import numpy as np -import random from sklearn.utils.random import sample_without_replacement diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py index 47dd9e9fc758b..4b1b902795feb 100644 --- a/benchmarks/bench_sgd_regression.py +++ b/benchmarks/bench_sgd_regression.py @@ -1,16 +1,15 @@ # Author: Peter Prettenhofer # License: BSD 3 clause -import numpy as np -import matplotlib.pyplot as plt - import gc - from time import time -from sklearn.linear_model import Ridge, SGDRegressor, ElasticNet -from sklearn.metrics import mean_squared_error +import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_regression +from sklearn.linear_model import ElasticNet, Ridge, SGDRegressor +from sklearn.metrics import mean_squared_error """ Benchmark for SGD regression diff --git a/benchmarks/bench_sparsify.py b/benchmarks/bench_sparsify.py index f1aa482b8b732..1832ca40c6ddb 100644 --- a/benchmarks/bench_sparsify.py +++ b/benchmarks/bench_sparsify.py @@ -43,8 +43,9 @@ 60 300 381409 1271.4 97.1 clf.predict(X_test_sparse) """ -from scipy.sparse import csr_matrix import numpy as np +from scipy.sparse import csr_matrix + from sklearn.linear_model import SGDRegressor from sklearn.metrics import r2_score diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py index 4f40e87f74e14..50431f6908a0e 100644 --- a/benchmarks/bench_text_vectorizers.py +++ b/benchmarks/bench_text_vectorizers.py @@ -8,8 +8,8 @@ * psutil (optional, but recommended) """ -import timeit import itertools +import timeit import numpy as np import pandas as pd @@ -18,8 +18,8 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import ( CountVectorizer, - TfidfVectorizer, HashingVectorizer, + TfidfVectorizer, ) n_repeat = 3 diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py index 1809cb7c5e9c0..a7d46aa729003 100644 --- a/benchmarks/bench_tree.py +++ b/benchmarks/bench_tree.py @@ -13,11 +13,12 @@ training set, classify a sample and plot the time taken as a function of the number of dimensions. """ -import numpy as np -import matplotlib.pyplot as plt import gc from datetime import datetime +import matplotlib.pyplot as plt +import numpy as np + # to store the results scikit_classifier_results = [] scikit_regressor_results = [] diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py index aa1a07a67ef44..2ed1db8846eab 100644 --- a/benchmarks/bench_tsne_mnist.py +++ b/benchmarks/bench_tsne_mnist.py @@ -7,18 +7,19 @@ # License: BSD 3 clause +import argparse +import json import os import os.path as op from time import time + import numpy as np -import json -import argparse -from joblib import Memory +from joblib import Memory from sklearn.datasets import fetch_openml +from sklearn.decomposition import PCA from sklearn.manifold import TSNE from sklearn.neighbors import NearestNeighbors -from sklearn.decomposition import PCA from sklearn.utils import check_array from sklearn.utils import shuffle as _shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads diff --git a/benchmarks/plot_tsne_mnist.py b/benchmarks/plot_tsne_mnist.py index d32e3dd769d6a..fff71eed0a26c 100644 --- a/benchmarks/plot_tsne_mnist.py +++ b/benchmarks/plot_tsne_mnist.py @@ -1,9 +1,8 @@ -import matplotlib.pyplot as plt -import numpy as np -import os.path as op - import argparse +import os.path as op +import matplotlib.pyplot as plt +import numpy as np LOG_DIR = "mnist_tsne_output" diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py index 68e198f8bdb38..8d335e85e502a 100755 --- a/build_tools/circle/list_versions.py +++ b/build_tools/circle/list_versions.py @@ -4,7 +4,6 @@ import json import re import sys - from distutils.version import LooseVersion from urllib.request import urlopen diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py index dfeb1b92e954f..d6ebfdd771c5f 100644 --- a/build_tools/generate_authors_table.py +++ b/build_tools/generate_authors_table.py @@ -6,12 +6,13 @@ The table should be updated for each new inclusion in the teams. Generating the table requires admin rights. """ -import sys -import requests import getpass +import sys import time -from pathlib import Path from os import path +from pathlib import Path + +import requests print("user:", file=sys.stderr) user = input() diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py index ef9bd77254fb5..2549da5cdbf82 100644 --- a/build_tools/github/check_wheels.py +++ b/build_tools/github/check_wheels.py @@ -1,8 +1,9 @@ """Checks that dist/* contains the number of wheels built from the .github/workflows/wheels.yml config.""" -import yaml -from pathlib import Path import sys +from pathlib import Path + +import yaml gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml" with gh_wheel_path.open("r") as f: diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py index bbc941d8f25f7..89db98bff8b5d 100644 --- a/build_tools/github/vendor.py +++ b/build_tools/github/vendor.py @@ -11,7 +11,6 @@ import sys import textwrap - TARGET_FOLDER = op.join("sklearn", ".libs") DISTRIBUTOR_INIT = op.join("sklearn", "_distributor_init.py") VCOMP140_SRC_PATH = "C:\\Windows\\System32\\vcomp140.dll" diff --git a/doc/conf.py b/doc/conf.py index 8276e8522f133..a60f411568fdb 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,14 +10,15 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os -import warnings import re +import sys +import warnings from datetime import datetime -from packaging.version import parse -from pathlib import Path from io import StringIO +from pathlib import Path + +from packaging.version import parse # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory @@ -25,8 +26,8 @@ # absolute, like shown here. sys.path.insert(0, os.path.abspath("sphinxext")) -from github_link import make_linkcode_resolve import sphinx_gallery +from github_link import make_linkcode_resolve from sphinx_gallery.sorting import ExampleTitleSortKey # -- General configuration --------------------------------------------------- @@ -413,8 +414,8 @@ def __call__(self, filename): # enable experimental module so that experimental estimators can be # discovered properly by sphinx -from sklearn.experimental import enable_iterative_imputer # noqa from sklearn.experimental import enable_halving_search_cv # noqa +from sklearn.experimental import enable_iterative_imputer # noqa def make_carousel_thumbs(app, exception): diff --git a/doc/conftest.py b/doc/conftest.py index 10253efeabf98..4edde566bf50c 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -1,16 +1,14 @@ import os -from os.path import exists -from os.path import join -from os import environ import warnings +from os import environ +from os.path import exists, join -from sklearn.utils import IS_PYPY -from sklearn.utils._testing import SkipTest -from sklearn.utils._testing import check_skip_network -from sklearn.utils.fixes import parse_version from sklearn.datasets import get_data_home from sklearn.datasets._base import _pkl_filepath from sklearn.datasets._twenty_newsgroups import CACHE_NAME +from sklearn.utils import IS_PYPY +from sklearn.utils._testing import SkipTest, check_skip_network +from sklearn.utils.fixes import parse_version def setup_labeled_faces(): diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py index bf51644b67116..ac4545e2c8f14 100755 --- a/doc/sphinxext/allow_nan_estimators.py +++ b/doc/sphinxext/allow_nan_estimators.py @@ -1,15 +1,12 @@ -from sklearn.utils import all_estimators -from sklearn.compose import ColumnTransformer -from sklearn.pipeline import FeatureUnion -from sklearn.decomposition import SparseCoder -from sklearn.utils.estimator_checks import _construct_instance -from sklearn.utils._testing import SkipTest -from docutils import nodes -import warnings from contextlib import suppress +from docutils import nodes from docutils.parsers.rst import Directive +from sklearn.utils import all_estimators +from sklearn.utils._testing import SkipTest +from sklearn.utils.estimator_checks import _construct_instance + class AllowNanEstimators(Directive): @staticmethod diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py index f851a12ec69ea..48e54d3fc785c 100644 --- a/doc/sphinxext/doi_role.py +++ b/doc/sphinxext/doi_role.py @@ -16,7 +16,6 @@ """ from docutils import nodes, utils - from sphinx.util.nodes import split_explicit_title diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py index 3992d814b825e..d3e43c8ed0f5e 100644 --- a/doc/sphinxext/github_link.py +++ b/doc/sphinxext/github_link.py @@ -1,9 +1,9 @@ -from operator import attrgetter import inspect -import subprocess import os +import subprocess import sys from functools import partial +from operator import attrgetter REVISION_CMD = "git rev-parse --short HEAD" diff --git a/doc/tutorial/machine_learning_map/parse_path.py b/doc/tutorial/machine_learning_map/parse_path.py index b1c68cec7f76b..4015c88fe8089 100644 --- a/doc/tutorial/machine_learning_map/parse_path.py +++ b/doc/tutorial/machine_learning_map/parse_path.py @@ -6,86 +6,105 @@ """ try: - from pyparsing import (ParserElement, Literal, Word, CaselessLiteral, - Optional, Combine, Forward, ZeroOrMore, nums, oneOf, Group, ParseException, OneOrMore) + from pyparsing import ( + CaselessLiteral, + Combine, + Group, + Literal, + OneOrMore, + Optional, + ParseException, + Word, + ZeroOrMore, + nums, + oneOf, + ) except ImportError: import sys + sys.exit("pyparsing is required") - - -#ParserElement.enablePackrat() + + +# ParserElement.enablePackrat() + def Command(char): - """ Case insensitive but case preserving""" + """Case insensitive but case preserving""" return CaselessPreservingLiteral(char) - + + def Arguments(token): return Group(token) - - + + class CaselessPreservingLiteral(CaselessLiteral): - """ Like CaselessLiteral, but returns the match as found - instead of as defined. + """Like CaselessLiteral, but returns the match as found + instead of as defined. """ - def __init__( self, matchString ): + + def __init__(self, matchString): super().__init__(matchString.upper()) self.name = "'%s'" % matchString self.errmsg = "Expected " + self.name self.myException.msg = self.errmsg - def parseImpl( self, instring, loc, doActions=True ): - test = instring[ loc:loc+self.matchLen ] + def parseImpl(self, instring, loc, doActions=True): + test = instring[loc : loc + self.matchLen] if test.upper() == self.match: - return loc+self.matchLen, test - #~ raise ParseException( instring, loc, self.errmsg ) + return loc + self.matchLen, test + # ~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring - raise exc - + raise exc + + def Sequence(token): - """ A sequence of the token""" - return OneOrMore(token+maybeComma) + """A sequence of the token""" + return OneOrMore(token + maybeComma) + digit_sequence = Word(nums) sign = oneOf("+ -") + def convertToFloat(s, loc, toks): try: return float(toks[0]) except BaseException as e: raise ParseException(loc, "invalid float format %s" % toks[0]) from e -exponent = CaselessLiteral("e")+Optional(sign)+Word(nums) -#note that almost all these fields are optional, -#and this can match almost anything. We rely on Pythons built-in -#float() function to clear out invalid values - loosely matching like this -#speeds up parsing quite a lot +exponent = CaselessLiteral("e") + Optional(sign) + Word(nums) + +# note that almost all these fields are optional, +# and this can match almost anything. We rely on Pythons built-in +# float() function to clear out invalid values - loosely matching like this +# speeds up parsing quite a lot floatingPointConstant = Combine( - Optional(sign) + - Optional(Word(nums)) + - Optional(Literal(".") + Optional(Word(nums)))+ - Optional(exponent) + Optional(sign) + + Optional(Word(nums)) + + Optional(Literal(".") + Optional(Word(nums))) + + Optional(exponent) ) floatingPointConstant.setParseAction(convertToFloat) number = floatingPointConstant -#same as FP constant but don't allow a - sign +# same as FP constant but don't allow a - sign nonnegativeNumber = Combine( - Optional(Word(nums)) + - Optional(Literal(".") + Optional(Word(nums)))+ - Optional(exponent) + Optional(Word(nums)) + + Optional(Literal(".") + Optional(Word(nums))) + + Optional(exponent) ) nonnegativeNumber.setParseAction(convertToFloat) coordinate = number -#comma or whitespace can separate values all over the place in SVG -maybeComma = Optional(Literal(',')).suppress() +# comma or whitespace can separate values all over the place in SVG +maybeComma = Optional(Literal(",")).suppress() coordinateSequence = Sequence(coordinate) @@ -95,31 +114,34 @@ def convertToFloat(s, loc, toks): coordinatePairPair = coordinatePair + maybeComma + coordinatePair coordinatePairPairSequence = Sequence(Group(coordinatePairPair)) -coordinatePairTriple = coordinatePair + maybeComma + coordinatePair + maybeComma + coordinatePair +coordinatePairTriple = ( + coordinatePair + maybeComma + coordinatePair + maybeComma + coordinatePair +) coordinatePairTripleSequence = Sequence(Group(coordinatePairTriple)) -#commands +# commands lineTo = Group(Command("L") + Arguments(coordinatePairSequence)) curve = Group(Command("C") + Arguments(coordinatePairSequence)) moveTo = Group(Command("M") + Arguments(coordinatePairSequence)) -closePath = Group(Command("Z")).setParseAction(lambda t: ('Z', (None,))) +closePath = Group(Command("Z")).setParseAction(lambda t: ("Z", (None,))) flag = oneOf("1 0").setParseAction(lambda t: bool(int((t[0])))) arcRadius = ( - nonnegativeNumber + maybeComma + #rx - nonnegativeNumber #ry + nonnegativeNumber + maybeComma + nonnegativeNumber # rx # ry ).setParseAction(tuple) arcFlags = (flag + maybeComma + flag).setParseAction(tuple) ellipticalArcArgument = Group( - arcRadius + maybeComma + #rx, ry - number + maybeComma +#rotation - arcFlags + #large-arc-flag, sweep-flag - coordinatePair #(x,y) + arcRadius + + maybeComma + + number # rx, ry + + maybeComma + + arcFlags # rotation + + coordinatePair # large-arc-flag, sweep-flag # (x,y) ) ellipticalArc = Group(Command("A") + Arguments(Sequence(ellipticalArcArgument))) @@ -130,63 +152,75 @@ def convertToFloat(s, loc, toks): smoothCurve = Group(Command("S") + Arguments(coordinatePairPairSequence)) -#curve = Group(Command("C") + Arguments(coordinatePairTripleSequence)) +# curve = Group(Command("C") + Arguments(coordinatePairTripleSequence)) horizontalLine = Group(Command("H") + Arguments(coordinateSequence)) verticalLine = Group(Command("V") + Arguments(coordinateSequence)) drawToCommand = ( - lineTo | moveTo | closePath | ellipticalArc | smoothQuadraticBezierCurveto | - quadraticBezierCurveto | smoothCurve | curve | horizontalLine | verticalLine - ) + lineTo + | moveTo + | closePath + | ellipticalArc + | smoothQuadraticBezierCurveto + | quadraticBezierCurveto + | smoothCurve + | curve + | horizontalLine + | verticalLine +) -#~ number.debug = True +# ~ number.debug = True moveToDrawToCommands = moveTo + ZeroOrMore(drawToCommand) path = ZeroOrMore(moveToDrawToCommands) path.keepTabs = True + def get_points(d): commands = path.parseString(d) points = [] currentset = None for command in commands: - if command[0] == 'M' or command[0] == 'm': + if command[0] == "M" or command[0] == "m": currentset = [] points.append(currentset) currentset.append(command[1][-1]) - elif command[0] == 'L' or command[0] == 'l': + elif command[0] == "L" or command[0] == "l": currentset.extend(command[1]) - elif command[0] == 'C' or command[0] == 'c': + elif command[0] == "C" or command[0] == "c": currentset.extend(command[1]) return points + if __name__ == "__main__": - s = ("M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 " - "L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z " - "M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 " - "L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 " - "L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 " - "L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 " - "L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 " - "L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 " - "L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 " - "L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 " - "L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 " - "L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 " - "L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 " - "L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z " - "M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 " - "L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 " - "L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 " - "L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 " - "L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 " - "L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 " - "L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 " - "L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 " - "L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 " - "L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 " - "L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 " - "L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 " - "L 329.99311,687.18672 z ") + s = ( + "M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 " + "L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z " + "M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 " + "L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 " + "L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 " + "L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 " + "L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 " + "L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 " + "L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 " + "L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 " + "L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 " + "L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 " + "L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 " + "L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z " + "M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 " + "L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 " + "L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 " + "L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 " + "L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 " + "L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 " + "L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 " + "L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 " + "L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 " + "L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 " + "L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 " + "L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 " + "L 329.99311,687.18672 z " + ) print(path.parseString(s)) diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py index a0f4a66c7291e..94e20ff8ad3d2 100644 --- a/doc/tutorial/machine_learning_map/pyparsing.py +++ b/doc/tutorial/machine_learning_map/pyparsing.py @@ -23,8 +23,7 @@ # # flake8: noqa -__doc__ = \ -""" +__doc__ = """ pyparsing module - Classes and methods to define and execute parsing grammars The pyparsing module is an alternative approach to creating and executing simple grammars, @@ -32,8 +31,8 @@ don't need to learn a new syntax for defining grammars or matching expressions - the parsing module provides a library of classes that you use to construct the grammar directly in Python. -Here is a program to parse "Hello, World!" (or any greeting of the form -C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements +Here is a program to parse "Hello, World!" (or any greeting of the form +C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements (L{'+'} operator gives L{And} expressions, strings are auto-converted to L{Literal} expressions):: @@ -65,18 +64,18 @@ class names, and the use of '+', '|' and '^' operators. __versionTime__ = "06 Mar 2017 02:06 UTC" __author__ = "Paul McGuire " -import string -from weakref import ref as wkref +import collections import copy -import sys -import warnings +import pprint import re import sre_constants -import collections -import pprint +import string +import sys import traceback import types +import warnings from datetime import datetime +from weakref import ref as wkref try: from _thread import RLock @@ -91,27 +90,114 @@ class names, and the use of '+', '|' and '^' operators. except ImportError: _OrderedDict = None -#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) +# ~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) __all__ = [ -'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', -'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', -'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', -'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', -'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', -'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', -'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', -'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', -'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', -'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', -'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', -'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', -'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', -'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', -'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', -'CloseMatch', 'tokenMap', 'pyparsing_common', + "And", + "CaselessKeyword", + "CaselessLiteral", + "CharsNotIn", + "Combine", + "Dict", + "Each", + "Empty", + "FollowedBy", + "Forward", + "GoToColumn", + "Group", + "Keyword", + "LineEnd", + "LineStart", + "Literal", + "MatchFirst", + "NoMatch", + "NotAny", + "OneOrMore", + "OnlyOnce", + "Optional", + "Or", + "ParseBaseException", + "ParseElementEnhance", + "ParseException", + "ParseExpression", + "ParseFatalException", + "ParseResults", + "ParseSyntaxException", + "ParserElement", + "QuotedString", + "RecursiveGrammarException", + "Regex", + "SkipTo", + "StringEnd", + "StringStart", + "Suppress", + "Token", + "TokenConverter", + "White", + "Word", + "WordEnd", + "WordStart", + "ZeroOrMore", + "alphanums", + "alphas", + "alphas8bit", + "anyCloseTag", + "anyOpenTag", + "cStyleComment", + "col", + "commaSeparatedList", + "commonHTMLEntity", + "countedArray", + "cppStyleComment", + "dblQuotedString", + "dblSlashComment", + "delimitedList", + "dictOf", + "downcaseTokens", + "empty", + "hexnums", + "htmlComment", + "javaStyleComment", + "line", + "lineEnd", + "lineStart", + "lineno", + "makeHTMLTags", + "makeXMLTags", + "matchOnlyAtCol", + "matchPreviousExpr", + "matchPreviousLiteral", + "nestedExpr", + "nullDebugAction", + "nums", + "oneOf", + "opAssoc", + "operatorPrecedence", + "printables", + "punc8bit", + "pythonStyleComment", + "quotedString", + "removeQuotes", + "replaceHTMLEntity", + "replaceWith", + "restOfLine", + "sglQuotedString", + "srange", + "stringEnd", + "stringStart", + "traceParseAction", + "unicodeString", + "upcaseTokens", + "withAttribute", + "indentedBlock", + "originalTextFor", + "ungroup", + "infixNotation", + "locatedExpr", + "withClass", + "CloseMatch", + "tokenMap", + "pyparsing_common", ] system_version = tuple(sys.version_info)[:3] @@ -123,7 +209,19 @@ class names, and the use of '+', '|' and '^' operators. _ustr = str # build list of single arg builtins, that can be used as parse actions - singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] + singleArgBuiltins = [ + sum, + len, + sorted, + reversed, + list, + tuple, + set, + any, + all, + min, + max, + ] else: _MAX_INT = sys.maxint @@ -131,10 +229,10 @@ class names, and the use of '+', '|' and '^' operators. def _ustr(obj): """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. """ - if isinstance(obj,unicode): + if isinstance(obj, unicode): return obj try: @@ -144,47 +242,53 @@ def _ustr(obj): except UnicodeEncodeError: # Else encode it - ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') - xmlcharref = Regex(r'&#\d+;') - xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) + ret = unicode(obj).encode(sys.getdefaultencoding(), "xmlcharrefreplace") + xmlcharref = Regex(r"&#\d+;") + xmlcharref.setParseAction(lambda t: "\\u" + hex(int(t[0][2:-1]))[2:]) return xmlcharref.transformString(ret) # build list of single arg builtins, tolerant of Python version, that can be used as parse actions singleArgBuiltins = [] import __builtin__ + for fname in "sum len sorted reversed list tuple set any all min max".split(): try: - singleArgBuiltins.append(getattr(__builtin__,fname)) + singleArgBuiltins.append(getattr(__builtin__, fname)) except AttributeError: continue - + _generatorType = type((y for y in range(1))) - + + def _xml_escape(data): """Escape &, <, >, ", ', etc. in a string of data.""" # ampersand must be replaced first - from_symbols = '&><"\'' - to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) - for from_,to_ in zip(from_symbols, to_symbols): + from_symbols = "&><\"'" + to_symbols = ("&" + s + ";" for s in "amp gt lt quot apos".split()) + for from_, to_ in zip(from_symbols, to_symbols): data = data.replace(from_, to_) return data + class _Constants(object): pass -alphas = string.ascii_uppercase + string.ascii_lowercase -nums = "0123456789" -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -_bslash = chr(92) + +alphas = string.ascii_uppercase + string.ascii_lowercase +nums = "0123456789" +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +_bslash = chr(92) printables = "".join(c for c in string.printable if c not in string.whitespace) + class ParseBaseException(Exception): """base exception class for all parsing runtime exceptions""" + # Performance tuning: we construct a *lot* of these, so keep this # constructor as small and fast as possible - def __init__( self, pstr, loc=0, msg=None, elem=None ): + def __init__(self, pstr, loc=0, msg=None, elem=None): self.loc = loc if msg is None: self.msg = pstr @@ -198,44 +302,53 @@ def __init__( self, pstr, loc=0, msg=None, elem=None ): @classmethod def _from_exception(cls, pe): """ - internal factory method to simplify creating one type of ParseException + internal factory method to simplify creating one type of ParseException from another - avoids having __init__ signature conflicts among subclasses """ return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) - def __getattr__( self, aname ): + def __getattr__(self, aname): """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text """ - if( aname == "lineno" ): - return lineno( self.loc, self.pstr ) - elif( aname in ("col", "column") ): - return col( self.loc, self.pstr ) - elif( aname == "line" ): - return line( self.loc, self.pstr ) + if aname == "lineno": + return lineno(self.loc, self.pstr) + elif aname in ("col", "column"): + return col(self.loc, self.pstr) + elif aname == "line": + return line(self.loc, self.pstr) else: raise AttributeError(aname) - def __str__( self ): - return "%s (at char %d), (line:%d, col:%d)" % \ - ( self.msg, self.loc, self.lineno, self.column ) - def __repr__( self ): + def __str__(self): + return "%s (at char %d), (line:%d, col:%d)" % ( + self.msg, + self.loc, + self.lineno, + self.column, + ) + + def __repr__(self): return _ustr(self) - def markInputline( self, markerString = ">!<" ): + + def markInputline(self, markerString=">!<"): """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. + the location of the exception with a special symbol. """ line_str = self.line line_column = self.column - 1 if markerString: - line_str = "".join((line_str[:line_column], - markerString, line_str[line_column:])) + line_str = "".join( + (line_str[:line_column], markerString, line_str[line_column:]) + ) return line_str.strip() + def __dir__(self): return "lineno col line".split() + dir(type(self)) + class ParseException(ParseBaseException): """ Exception thrown when parse expressions don't match class; @@ -243,61 +356,74 @@ class ParseException(ParseBaseException): - lineno - returns the line number of the exception text - col - returns the column number of the exception text - line - returns the line containing the exception text - + Example:: try: Word(nums).setName("integer").parseString("ABC") except ParseException as pe: print(pe) print("column: {}".format(pe.col)) - + prints:: Expected integer (at char 0), (line:1, col:1) column: 1 """ + pass + class ParseFatalException(ParseBaseException): """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" + is found; stops all parsing immediately""" + pass + class ParseSyntaxException(ParseFatalException): """just like L{ParseFatalException}, but thrown internally when an - L{ErrorStop} ('-' operator) indicates that parsing is to stop - immediately because an unbacktrackable syntax error has been found""" + L{ErrorStop} ('-' operator) indicates that parsing is to stop + immediately because an unbacktrackable syntax error has been found""" + pass -#~ class ReparseException(ParseBaseException): - #~ """Experimental class - parse actions can raise this exception to cause - #~ pyparsing to reparse the input string: - #~ - with a modified input string, and/or - #~ - with a modified start location - #~ Set the values of the ReparseException in the constructor, and raise the - #~ exception in a parse action to cause pyparsing to use the new string/location. - #~ Setting the values as None causes no change to be made. - #~ """ - #~ def __init_( self, newstring, restartLoc ): - #~ self.newParseText = newstring - #~ self.reparseLoc = restartLoc + +# ~ class ReparseException(ParseBaseException): +# ~ """Experimental class - parse actions can raise this exception to cause +# ~ pyparsing to reparse the input string: +# ~ - with a modified input string, and/or +# ~ - with a modified start location +# ~ Set the values of the ReparseException in the constructor, and raise the +# ~ exception in a parse action to cause pyparsing to use the new string/location. +# ~ Setting the values as None causes no change to be made. +# ~ """ +# ~ def __init_( self, newstring, restartLoc ): +# ~ self.newParseText = newstring +# ~ self.reparseLoc = restartLoc + class RecursiveGrammarException(Exception): """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" - def __init__( self, parseElementList ): + + def __init__(self, parseElementList): self.parseElementTrace = parseElementList - def __str__( self ): + def __str__(self): return "RecursiveGrammarException: %s" % self.parseElementTrace + class _ParseResultsWithOffset(object): - def __init__(self,p1,p2): - self.tup = (p1,p2) - def __getitem__(self,i): + def __init__(self, p1, p2): + self.tup = (p1, p2) + + def __getitem__(self, i): return self.tup[i] + def __repr__(self): return repr(self.tup[0]) - def setOffset(self,i): - self.tup = (self.tup[0],i) + + def setOffset(self, i): + self.tup = (self.tup[0], i) + class ParseResults(object): """ @@ -308,8 +434,8 @@ class ParseResults(object): Example:: integer = Word(nums) - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' + date_str = (integer.setResultsName("year") + '/' + + integer.setResultsName("month") + '/' + integer.setResultsName("day")) # equivalent form: # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") @@ -338,7 +464,8 @@ def test(s, fn=repr): - month: 12 - year: 1999 """ - def __new__(cls, toklist=None, name=None, asList=True, modal=True ): + + def __new__(cls, toklist=None, name=None, asList=True, modal=True): if isinstance(toklist, cls): return toklist retobj = object.__new__(cls) @@ -347,7 +474,9 @@ def __new__(cls, toklist=None, name=None, asList=True, modal=True ): # Performance tuning: we construct a *lot* of these, so keep this # constructor as small and fast as possible - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): + def __init__( + self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance + ): if self.__doinit: self.__doinit = False self.__name = None @@ -368,89 +497,109 @@ def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance if name is not None and name: if not modal: self.__accumNames[name] = 0 - if isinstance(name,int): - name = _ustr(name) # will always return a str, but use _ustr for consistency + if isinstance(name, int): + name = _ustr( + name + ) # will always return a str, but use _ustr for consistency self.__name = name - if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): - if isinstance(toklist,basestring): - toklist = [ toklist ] + if not ( + isinstance(toklist, (type(None), basestring, list)) + and toklist in (None, "", []) + ): + if isinstance(toklist, basestring): + toklist = [toklist] if asList: - if isinstance(toklist,ParseResults): - self[name] = _ParseResultsWithOffset(toklist.copy(),0) + if isinstance(toklist, ParseResults): + self[name] = _ParseResultsWithOffset(toklist.copy(), 0) else: - self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) + self[name] = _ParseResultsWithOffset( + ParseResults(toklist[0]), 0 + ) self[name].__name = name else: try: self[name] = toklist[0] - except (KeyError,TypeError,IndexError): + except (KeyError, TypeError, IndexError): self[name] = toklist - def __getitem__( self, i ): - if isinstance( i, (int,slice) ): + def __getitem__(self, i): + if isinstance(i, (int, slice)): return self.__toklist[i] else: if i not in self.__accumNames: return self.__tokdict[i][-1][0] else: - return ParseResults([ v[0] for v in self.__tokdict[i] ]) + return ParseResults([v[0] for v in self.__tokdict[i]]) - def __setitem__( self, k, v, isinstance=isinstance ): - if isinstance(v,_ParseResultsWithOffset): - self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] + def __setitem__(self, k, v, isinstance=isinstance): + if isinstance(v, _ParseResultsWithOffset): + self.__tokdict[k] = self.__tokdict.get(k, list()) + [v] sub = v[0] - elif isinstance(k,(int,slice)): + elif isinstance(k, (int, slice)): self.__toklist[k] = v sub = v else: - self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] + self.__tokdict[k] = self.__tokdict.get(k, list()) + [ + _ParseResultsWithOffset(v, 0) + ] sub = v - if isinstance(sub,ParseResults): + if isinstance(sub, ParseResults): sub.__parent = wkref(self) - def __delitem__( self, i ): - if isinstance(i,(int,slice)): - mylen = len( self.__toklist ) + def __delitem__(self, i): + if isinstance(i, (int, slice)): + mylen = len(self.__toklist) del self.__toklist[i] # convert int to slice if isinstance(i, int): if i < 0: i += mylen - i = slice(i, i+1) + i = slice(i, i + 1) # get removed indices removed = list(range(*i.indices(mylen))) removed.reverse() # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): + for name, occurrences in self.__tokdict.items(): for j in removed: for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) + occurrences[k] = _ParseResultsWithOffset( + value, position - (position > j) + ) else: del self.__tokdict[i] - def __contains__( self, k ): + def __contains__(self, k): return k in self.__tokdict - def __len__( self ): return len( self.__toklist ) - def __bool__(self): return ( not not self.__toklist ) + def __len__(self): + return len(self.__toklist) + + def __bool__(self): + return not not self.__toklist + __nonzero__ = __bool__ - def __iter__( self ): return iter( self.__toklist ) - def __reversed__( self ): return iter( self.__toklist[::-1] ) - def _iterkeys( self ): + + def __iter__(self): + return iter(self.__toklist) + + def __reversed__(self): + return iter(self.__toklist[::-1]) + + def _iterkeys(self): if hasattr(self.__tokdict, "iterkeys"): return self.__tokdict.iterkeys() else: return iter(self.__tokdict) - def _itervalues( self ): + def _itervalues(self): return (self[k] for k in self._iterkeys()) - - def _iteritems( self ): + + def _iteritems(self): return ((k, self[k]) for k in self._iterkeys()) if PY_3: - keys = _iterkeys + keys = _iterkeys """Returns an iterator of all named result keys (Python 3.x only).""" values = _itervalues @@ -469,32 +618,32 @@ def _iteritems( self ): iteritems = _iteritems """Returns an iterator of all named result key-value tuples (Python 2.x only).""" - def keys( self ): + def keys(self): """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" return list(self.iterkeys()) - def values( self ): + def values(self): """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" return list(self.itervalues()) - - def items( self ): + + def items(self): """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" return list(self.iteritems()) - def haskeys( self ): + def haskeys(self): """Since keys() returns an iterator, this method is helpful in bypassing - code that looks for the existence of any defined results names.""" + code that looks for the existence of any defined results names.""" return bool(self.__tokdict) - - def pop( self, *args, **kwargs): + + def pop(self, *args, **kwargs): """ Removes and returns item at specified index (default=C{last}). Supports both C{list} and C{dict} semantics for C{pop()}. If passed no argument or an integer argument, it will use C{list} semantics - and pop tokens from the list of parsed tokens. If passed a + and pop tokens from the list of parsed tokens. If passed a non-integer argument (most likely a string), it will use C{dict} - semantics and pop the corresponding value from any defined - results names. A second default return value argument is + semantics and pop the corresponding value from any defined + results names. A second default return value argument is supported, just as in C{dict.pop()}. Example:: @@ -522,14 +671,12 @@ def remove_LABEL(tokens): """ if not args: args = [-1] - for k,v in kwargs.items(): - if k == 'default': + for k, v in kwargs.items(): + if k == "default": args = (args[0], v) else: raise TypeError("pop() got an unexpected keyword argument '%s'" % k) - if (isinstance(args[0], int) or - len(args) == 1 or - args[0] in self): + if isinstance(args[0], int) or len(args) == 1 or args[0] in self: index = args[0] ret = self[index] del self[index] @@ -545,10 +692,10 @@ def get(self, key, defaultValue=None): C{defaultValue} is specified. Similar to C{dict.get()}. - + Example:: integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") result = date_str.parseString("1999/12/31") print(result.get("year")) # -> '1999' @@ -560,10 +707,10 @@ def get(self, key, defaultValue=None): else: return defaultValue - def insert( self, index, insStr ): + def insert(self, index, insStr): """ Inserts new element at location index in the list of parsed tokens. - + Similar to C{list.insert()}. Example:: @@ -576,17 +723,19 @@ def insert_locn(locn, tokens): """ self.__toklist.insert(index, insStr) # fixup indices in token dictionary - for name,occurrences in self.__tokdict.items(): + for name, occurrences in self.__tokdict.items(): for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) + occurrences[k] = _ParseResultsWithOffset( + value, position + (position > index) + ) - def append( self, item ): + def append(self, item): """ Add single element to end of ParseResults list of elements. Example:: print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - + # use a parse action to compute the sum of the parsed integers, and add it to the end def append_sum(tokens): tokens.append(sum(map(int, tokens))) @@ -594,13 +743,13 @@ def append_sum(tokens): """ self.__toklist.append(item) - def extend( self, itemseq ): + def extend(self, itemseq): """ Add sequence of elements to end of ParseResults list of elements. Example:: patt = OneOrMore(Word(alphas)) - + # use a parse action to append the reverse of the matched strings, to make a palindrome def make_palindrome(tokens): tokens.extend(reversed([t[::-1] for t in tokens])) @@ -612,74 +761,84 @@ def make_palindrome(tokens): else: self.__toklist.extend(itemseq) - def clear( self ): + def clear(self): """ Clear all elements and results names. """ del self.__toklist[:] self.__tokdict.clear() - def __getattr__( self, name ): + def __getattr__(self, name): try: return self[name] except KeyError: return "" - + if name in self.__tokdict: if name not in self.__accumNames: return self.__tokdict[name][-1][0] else: - return ParseResults([ v[0] for v in self.__tokdict[name] ]) + return ParseResults([v[0] for v in self.__tokdict[name]]) else: return "" - def __add__( self, other ): + def __add__(self, other): ret = self.copy() ret += other return ret - def __iadd__( self, other ): + def __iadd__(self, other): if other.__tokdict: offset = len(self.__toklist) - addoffset = lambda a: offset if a<0 else a+offset + addoffset = lambda a: offset if a < 0 else a + offset otheritems = other.__tokdict.items() - otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) - for (k,vlist) in otheritems for v in vlist] - for k,v in otherdictitems: + otherdictitems = [ + (k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) + for (k, vlist) in otheritems + for v in vlist + ] + for k, v in otherdictitems: self[k] = v - if isinstance(v[0],ParseResults): + if isinstance(v[0], ParseResults): v[0].__parent = wkref(self) - + self.__toklist += other.__toklist - self.__accumNames.update( other.__accumNames ) + self.__accumNames.update(other.__accumNames) return self def __radd__(self, other): - if isinstance(other,int) and other == 0: + if isinstance(other, int) and other == 0: # useful for merging many ParseResults using sum() builtin return self.copy() else: # this may raise a TypeError - so be it return other + self - - def __repr__( self ): - return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) - def __str__( self ): - return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' + def __repr__(self): + return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict)) + + def __str__(self): + return ( + "[" + + ", ".join( + _ustr(i) if isinstance(i, ParseResults) else repr(i) + for i in self.__toklist + ) + + "]" + ) - def _asStringList( self, sep='' ): + def _asStringList(self, sep=""): out = [] for item in self.__toklist: if out and sep: out.append(sep) - if isinstance( item, ParseResults ): + if isinstance(item, ParseResults): out += item._asStringList() else: - out.append( _ustr(item) ) + out.append(_ustr(item)) return out - def asList( self ): + def asList(self): """ Returns the parse results as a nested list of matching tokens, all converted to strings. @@ -688,24 +847,27 @@ def asList( self ): result = patt.parseString("sldkj lsdkj sldkj") # even though the result prints in string-like form, it is actually a pyparsing ParseResults print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] - + # Use asList() to create an actual list result_list = result.asList() print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] """ - return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] + return [ + res.asList() if isinstance(res, ParseResults) else res + for res in self.__toklist + ] - def asDict( self ): + def asDict(self): """ Returns the named parse results as a nested dictionary. Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - + result = date_str.parseString('12/31/1999') print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) - + result_dict = result.asDict() print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} @@ -718,7 +880,7 @@ def asDict( self ): item_fn = self.items else: item_fn = self.iteritems - + def toItem(obj): if isinstance(obj, ParseResults): if obj.haskeys(): @@ -727,28 +889,29 @@ def toItem(obj): return [toItem(v) for v in obj] else: return obj - - return dict((k,toItem(v)) for k,v in item_fn()) - def copy( self ): + return dict((k, toItem(v)) for k, v in item_fn()) + + def copy(self): """ Returns a new copy of a C{ParseResults} object. """ - ret = ParseResults( self.__toklist ) + ret = ParseResults(self.__toklist) ret.__tokdict = self.__tokdict.copy() ret.__parent = self.__parent - ret.__accumNames.update( self.__accumNames ) + ret.__accumNames.update(self.__accumNames) ret.__name = self.__name return ret - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): + def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): """ (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. """ nl = "\n" out = [] - namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() - for v in vlist) + namedItems = dict( + (v[1], k) for (k, vlist) in self.__tokdict.items() for v in vlist + ) nextLevelIndent = indent + " " # collapse out indents if formatting is not desired @@ -770,20 +933,28 @@ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): else: selfTag = "ITEM" - out += [ nl, indent, "<", selfTag, ">" ] + out += [nl, indent, "<", selfTag, ">"] - for i,res in enumerate(self.__toklist): - if isinstance(res,ParseResults): + for i, res in enumerate(self.__toklist): + if isinstance(res, ParseResults): if i in namedItems: - out += [ res.asXML(namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] + out += [ + res.asXML( + namedItems[i], + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted, + ) + ] else: - out += [ res.asXML(None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] + out += [ + res.asXML( + None, + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted, + ) + ] else: # individual token, see if there is a name for it resTag = None @@ -795,34 +966,42 @@ def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): else: resTag = "ITEM" xmlBodyText = _xml_escape(_ustr(res)) - out += [ nl, nextLevelIndent, "<", resTag, ">", - xmlBodyText, - "" ] - - out += [ nl, indent, "" ] + out += [ + nl, + nextLevelIndent, + "<", + resTag, + ">", + xmlBodyText, + "", + ] + + out += [nl, indent, ""] return "".join(out) - def __lookup(self,sub): - for k,vlist in self.__tokdict.items(): - for v,loc in vlist: + def __lookup(self, sub): + for k, vlist in self.__tokdict.items(): + for v, loc in vlist: if sub is v: return k return None def getName(self): r""" - Returns the results name for this token expression. Useful when several + Returns the results name for this token expression. Useful when several different expressions might match at a particular location. Example:: integer = Word(nums) ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") house_number_expr = Suppress('#') + Word(nums, alphanums) - user_data = (Group(house_number_expr)("house_number") + user_data = (Group(house_number_expr)("house_number") | Group(ssn_expr)("ssn") | Group(integer)("age")) user_info = OneOrMore(user_data) - + result = user_info.parseString("22 111-22-3333 #221B") for item in result: print(item.getName(), ':', item[0]) @@ -839,14 +1018,16 @@ def getName(self): return par.__lookup(self) else: return None - elif (len(self) == 1 and - len(self.__tokdict) == 1 and - next(iter(self.__tokdict.values()))[0][1] in (0,-1)): + elif ( + len(self) == 1 + and len(self.__tokdict) == 1 + and next(iter(self.__tokdict.values()))[0][1] in (0, -1) + ): return next(iter(self.__tokdict.keys())) else: return None - def dump(self, indent='', depth=0, full=True): + def dump(self, indent="", depth=0, full=True): """ Diagnostic method for listing out the contents of a C{ParseResults}. Accepts an optional C{indent} argument so that this string can be embedded @@ -855,7 +1036,7 @@ def dump(self, indent='', depth=0, full=True): Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - + result = date_str.parseString('12/31/1999') print(result.dump()) prints:: @@ -865,36 +1046,56 @@ def dump(self, indent='', depth=0, full=True): - year: 12 """ out = [] - NL = '\n' - out.append( indent+_ustr(self.asList()) ) + NL = "\n" + out.append(indent + _ustr(self.asList())) if full: if self.haskeys(): - items = sorted((str(k), v) for k,v in self.items()) - for k,v in items: + items = sorted((str(k), v) for k, v in self.items()) + for k, v in items: if out: out.append(NL) - out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) - if isinstance(v,ParseResults): + out.append("%s%s- %s: " % (indent, (" " * depth), k)) + if isinstance(v, ParseResults): if v: - out.append( v.dump(indent,depth+1) ) + out.append(v.dump(indent, depth + 1)) else: out.append(_ustr(v)) else: out.append(repr(v)) - elif any(isinstance(vv,ParseResults) for vv in self): + elif any(isinstance(vv, ParseResults) for vv in self): v = self - for i,vv in enumerate(v): - if isinstance(vv,ParseResults): - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) + for i, vv in enumerate(v): + if isinstance(vv, ParseResults): + out.append( + "\n%s%s[%d]:\n%s%s%s" + % ( + indent, + (" " * (depth)), + i, + indent, + (" " * (depth + 1)), + vv.dump(indent, depth + 1), + ) + ) else: - out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) - + out.append( + "\n%s%s[%d]:\n%s%s%s" + % ( + indent, + (" " * (depth)), + i, + indent, + (" " * (depth + 1)), + _ustr(vv), + ) + ) + return "".join(out) def pprint(self, *args, **kwargs): """ Pretty-printer for parsed results as a list, using the C{pprint} module. - Accepts additional positional or keyword args as defined for the + Accepts additional positional or keyword args as defined for the C{pprint.pprint} method. (U{https://docs.python.org/3/library/pprint.html#pprint.pprint}) Example:: @@ -916,18 +1117,19 @@ def pprint(self, *args, **kwargs): # add support for pickle protocol def __getstate__(self): - return ( self.__toklist, - ( self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name ) ) - - def __setstate__(self,state): + return ( + self.__toklist, + ( + self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name, + ), + ) + + def __setstate__(self, state): self.__toklist = state[0] - (self.__tokdict, - par, - inAccumNames, - self.__name) = state[1] + (self.__tokdict, par, inAccumNames, self.__name) = state[1] self.__accumNames = {} self.__accumNames.update(inAccumNames) if par is not None: @@ -939,109 +1141,128 @@ def __getnewargs__(self): return self.__toklist, self.__name, self.__asList, self.__modal def __dir__(self): - return (dir(type(self)) + list(self.keys())) + return dir(type(self)) + list(self.keys()) + collections.MutableMapping.register(ParseResults) -def col (loc,strg): + +def col(loc, strg): """Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing C{}s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ s = strg - return 1 if 0} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return strg.count("\n",0,loc) + 1 - -def line( loc, strg ): - """Returns the line of text containing loc within a string, counting newlines as line separators. - """ + The first line is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing C{}s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return strg.count("\n", 0, loc) + 1 + + +def line(loc, strg): + """Returns the line of text containing loc within a string, counting newlines as line separators.""" lastCR = strg.rfind("\n", 0, loc) nextCR = strg.find("\n", loc) if nextCR >= 0: - return strg[lastCR+1:nextCR] + return strg[lastCR + 1 : nextCR] else: - return strg[lastCR+1:] + return strg[lastCR + 1 :] + + +def _defaultStartDebugAction(instring, loc, expr): + print( + ( + "Match " + + _ustr(expr) + + " at loc " + + _ustr(loc) + + "(%d,%d)" % (lineno(loc, instring), col(loc, instring)) + ) + ) + -def _defaultStartDebugAction( instring, loc, expr ): - print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) +def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks): + print("Matched " + _ustr(expr) + " -> " + str(toks.asList())) -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): - print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) -def _defaultExceptionDebugAction( instring, loc, expr, exc ): - print ("Exception raised:" + _ustr(exc)) +def _defaultExceptionDebugAction(instring, loc, expr, exc): + print("Exception raised:" + _ustr(exc)) + def nullDebugAction(*args): """'Do-nothing' debug action, to suppress debugging output during parsing.""" pass + # Only works on Python 3.x - nonlocal is toxic to Python 2 installs -#~ 'decorator to trim function calls to match the arity of the target' -#~ def _trim_arity(func, maxargs=3): - #~ if func in singleArgBuiltins: - #~ return lambda s,l,t: func(t) - #~ limit = 0 - #~ foundArity = False - #~ def wrapper(*args): - #~ nonlocal limit,foundArity - #~ while 1: - #~ try: - #~ ret = func(*args[limit:]) - #~ foundArity = True - #~ return ret - #~ except TypeError: - #~ if limit == maxargs or foundArity: - #~ raise - #~ limit += 1 - #~ continue - #~ return wrapper +# ~ 'decorator to trim function calls to match the arity of the target' +# ~ def _trim_arity(func, maxargs=3): +# ~ if func in singleArgBuiltins: +# ~ return lambda s,l,t: func(t) +# ~ limit = 0 +# ~ foundArity = False +# ~ def wrapper(*args): +# ~ nonlocal limit,foundArity +# ~ while 1: +# ~ try: +# ~ ret = func(*args[limit:]) +# ~ foundArity = True +# ~ return ret +# ~ except TypeError: +# ~ if limit == maxargs or foundArity: +# ~ raise +# ~ limit += 1 +# ~ continue +# ~ return wrapper # this version is Python 2.x-3.x cross-compatible -'decorator to trim function calls to match the arity of the target' +"decorator to trim function calls to match the arity of the target" + + def _trim_arity(func, maxargs=2): if func in singleArgBuiltins: - return lambda s,l,t: func(t) + return lambda s, l, t: func(t) limit = [0] foundArity = [False] - + def extract_stack(limit=0): offset = -2 - frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] + frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset] return [(frame_summary.filename, frame_summary.lineno)] + def extract_tb(tb, limit=0): frames = traceback.extract_tb(tb, limit=limit) frame_summary = frames[-1] return [(frame_summary.filename, frame_summary.lineno)] - - # synthesize what would be returned by traceback.extract_stack at the call to + + # synthesize what would be returned by traceback.extract_stack at the call to # user's parse action 'func', so that we don't incur call penalty at parse time - + LINE_DIFF = 6 - # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND + # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! this_line = extract_stack(limit=2)[-1] - pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) + pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF) def wrapper(*args): while 1: try: - ret = func(*args[limit[0]:]) + ret = func(*args[limit[0] :]) foundArity[0] = True return ret except TypeError: @@ -1064,28 +1285,29 @@ def wrapper(*args): # copy func name to wrapper for sensible debug output func_name = "" try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) + func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) except Exception: func_name = str(func) wrapper.__name__ = func_name return wrapper + class ParserElement(object): """Abstract base level parser element class.""" + DEFAULT_WHITE_CHARS = " \n\t\r" verbose_stacktrace = False @staticmethod - def setDefaultWhitespaceChars( chars ): + def setDefaultWhitespaceChars(chars): r""" Overrides the default whitespace chars Example:: # default whitespace chars are space, and newline OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] - + # change to just treat newline as significant ParserElement.setDefaultWhitespaceChars(" \t") OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] @@ -1096,84 +1318,84 @@ def setDefaultWhitespaceChars( chars ): def inlineLiteralsUsing(cls): """ Set class to be used for inclusion of string literals into a parser. - + Example:: # default literal class used is Literal integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] # change to Suppress ParserElement.inlineLiteralsUsing(Suppress) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] """ ParserElement._literalStringClass = cls - def __init__( self, savelist=False ): + def __init__(self, savelist=False): self.parseAction = list() self.failAction = None - #~ self.name = "" # don't define self.name, let subclasses try/except upcall + # ~ self.name = "" # don't define self.name, let subclasses try/except upcall self.strRepr = None self.resultsName = None self.saveAsList = savelist self.skipWhitespace = True self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS self.copyDefaultWhiteChars = True - self.mayReturnEmpty = False # used when checking for left-recursion + self.mayReturnEmpty = False # used when checking for left-recursion self.keepTabs = False self.ignoreExprs = list() self.debug = False self.streamlined = False - self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index + self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index self.errmsg = "" - self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) - self.debugActions = ( None, None, None ) #custom debug actions + self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) + self.debugActions = (None, None, None) # custom debug actions self.re = None - self.callPreparse = True # used to avoid redundant calls to preParse + self.callPreparse = True # used to avoid redundant calls to preParse self.callDuringTry = False - def copy( self ): + def copy(self): """ Make a copy of this C{ParserElement}. Useful for defining different parse actions for the same parsing pattern, using copies of the original parse element. - + Example:: integer = Word(nums).setParseAction(lambda toks: int(toks[0])) integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - + print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) prints:: [5120, 100, 655360, 268435456] Equivalent form of C{expr.copy()} is just C{expr()}:: integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") """ - cpy = copy.copy( self ) + cpy = copy.copy(self) cpy.parseAction = self.parseAction[:] cpy.ignoreExprs = self.ignoreExprs[:] if self.copyDefaultWhiteChars: cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS return cpy - def setName( self, name ): + def setName(self, name): """ Define name for this expression, makes debugging and exception messages clearer. - + Example:: Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) """ self.name = name self.errmsg = "Expected " + self.name - if hasattr(self,"exception"): + if hasattr(self, "exception"): self.exception.msg = self.errmsg return self - def setResultsName( self, name, listAllMatches=False ): + def setResultsName(self, name, listAllMatches=False): """ Define name for referencing matching tokens as a nested attribute of the returned parse results. @@ -1182,12 +1404,12 @@ def setResultsName( self, name, listAllMatches=False ): integer, and reference it in multiple places with different names. You can also set results names using the abbreviated syntax, - C{expr("name")} in place of C{expr.setResultsName("name")} - + C{expr("name")} in place of C{expr.setResultsName("name")} - see L{I{__call__}<__call__>}. Example:: - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' + date_str = (integer.setResultsName("year") + '/' + + integer.setResultsName("month") + '/' + integer.setResultsName("day")) # equivalent form: @@ -1196,30 +1418,33 @@ def setResultsName( self, name, listAllMatches=False ): newself = self.copy() if name.endswith("*"): name = name[:-1] - listAllMatches=True + listAllMatches = True newself.resultsName = name newself.modalResults = not listAllMatches return newself - def setBreak(self,breakFlag = True): + def setBreak(self, breakFlag=True): """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set C{breakFlag} to True to enable, False to - disable. + about to be parsed. Set C{breakFlag} to True to enable, False to + disable. """ if breakFlag: _parseMethod = self._parse + def breaker(instring, loc, doActions=True, callPreParse=True): import pdb + pdb.set_trace() - return _parseMethod( instring, loc, doActions, callPreParse ) + return _parseMethod(instring, loc, doActions, callPreParse) + breaker._originalParseMethod = _parseMethod self._parse = breaker else: - if hasattr(self._parse,"_originalParseMethod"): + if hasattr(self._parse, "_originalParseMethod"): self._parse = self._parse._originalParseMethod return self - def setParseAction( self, *fns, **kwargs ): + def setParseAction(self, *fns, **kwargs): """ Define one or more actions to perform when successfully matching parse element definition. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, @@ -1239,7 +1464,7 @@ def setParseAction( self, *fns, **kwargs ): on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. - + Example:: integer = Word(nums) date_str = integer + '/' + integer + '/' + integer @@ -1257,10 +1482,10 @@ def setParseAction( self, *fns, **kwargs ): self.callDuringTry = kwargs.get("callDuringTry", False) return self - def addParseAction( self, *fns, **kwargs ): + def addParseAction(self, *fns, **kwargs): """ Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}}. - + See examples in L{I{copy}}. """ self.parseAction += list(map(_trim_arity, list(fns))) @@ -1268,14 +1493,14 @@ def addParseAction( self, *fns, **kwargs ): return self def addCondition(self, *fns, **kwargs): - """Add a boolean predicate function to expression's list of parse actions. See - L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, + """Add a boolean predicate function to expression's list of parse actions. See + L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, functions passed to C{addCondition} need to return boolean success/fail of the condition. Optional keyword arguments: - message = define a custom message to be used in the raised exception - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException - + Example:: integer = Word(nums).setParseAction(lambda toks: int(toks[0])) year_int = integer.copy() @@ -1287,42 +1512,44 @@ def addCondition(self, *fns, **kwargs): msg = kwargs.get("message", "failed user-defined condition") exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException for fn in fns: - def pa(s,l,t): - if not bool(_trim_arity(fn)(s,l,t)): - raise exc_type(s,l,msg) + + def pa(s, l, t): + if not bool(_trim_arity(fn)(s, l, t)): + raise exc_type(s, l, msg) + self.parseAction.append(pa) self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) return self - def setFailAction( self, fn ): + def setFailAction(self, fn): """Define action to perform if parsing fails at this expression. - Fail action fn is a callable function that takes the arguments - C{fn(s,loc,expr,err)} where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw C{L{ParseFatalException}} - if it is desired to stop parsing immediately.""" + Fail action fn is a callable function that takes the arguments + C{fn(s,loc,expr,err)} where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw C{L{ParseFatalException}} + if it is desired to stop parsing immediately.""" self.failAction = fn return self - def _skipIgnorables( self, instring, loc ): + def _skipIgnorables(self, instring, loc): exprsFound = True while exprsFound: exprsFound = False for e in self.ignoreExprs: try: while 1: - loc,dummy = e._parse( instring, loc ) + loc, dummy = e._parse(instring, loc) exprsFound = True except ParseException: pass return loc - def preParse( self, instring, loc ): + def preParse(self, instring, loc): if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) + loc = self._skipIgnorables(instring, loc) if self.skipWhitespace: wt = self.whiteChars @@ -1332,91 +1559,99 @@ def preParse( self, instring, loc ): return loc - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): return loc, [] - def postParse( self, instring, loc, tokenlist ): + def postParse(self, instring, loc, tokenlist): return tokenlist - #~ @profile - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): - debugging = ( self.debug ) #and doActions ) + # ~ @profile + def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): + debugging = self.debug # and doActions ) if debugging or self.failAction: - #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if (self.debugActions[0] ): - self.debugActions[0]( instring, loc, self ) + # ~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + if self.debugActions[0]: + self.debugActions[0](instring, loc, self) if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) + preloc = self.preParse(instring, loc) else: preloc = loc tokensStart = preloc try: try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) + loc, tokens = self.parseImpl(instring, preloc, doActions) except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) + raise ParseException(instring, len(instring), self.errmsg, self) except ParseBaseException as err: - #~ print ("Exception raised:", err) + # ~ print ("Exception raised:", err) if self.debugActions[2]: - self.debugActions[2]( instring, tokensStart, self, err ) + self.debugActions[2](instring, tokensStart, self, err) if self.failAction: - self.failAction( instring, tokensStart, self, err ) + self.failAction(instring, tokensStart, self, err) raise else: if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) + preloc = self.preParse(instring, loc) else: preloc = loc tokensStart = preloc if self.mayIndexError or loc >= len(instring): try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) + loc, tokens = self.parseImpl(instring, preloc, doActions) except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) + raise ParseException(instring, len(instring), self.errmsg, self) else: - loc,tokens = self.parseImpl( instring, preloc, doActions ) + loc, tokens = self.parseImpl(instring, preloc, doActions) - tokens = self.postParse( instring, loc, tokens ) + tokens = self.postParse(instring, loc, tokens) - retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) + retTokens = ParseResults( + tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults + ) if self.parseAction and (doActions or self.callDuringTry): if debugging: try: for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) + tokens = fn(instring, tokensStart, retTokens) if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) + retTokens = ParseResults( + tokens, + self.resultsName, + asList=self.saveAsList + and isinstance(tokens, (ParseResults, list)), + modal=self.modalResults, + ) except ParseBaseException as err: - #~ print "Exception raised in user parse action:", err - if (self.debugActions[2] ): - self.debugActions[2]( instring, tokensStart, self, err ) + # ~ print "Exception raised in user parse action:", err + if self.debugActions[2]: + self.debugActions[2](instring, tokensStart, self, err) raise else: for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) + tokens = fn(instring, tokensStart, retTokens) if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) + retTokens = ParseResults( + tokens, + self.resultsName, + asList=self.saveAsList + and isinstance(tokens, (ParseResults, list)), + modal=self.modalResults, + ) if debugging: - #~ print ("Matched",self,"->",retTokens.asList()) - if (self.debugActions[1] ): - self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) + # ~ print ("Matched",self,"->",retTokens.asList()) + if self.debugActions[1]: + self.debugActions[1](instring, tokensStart, loc, self, retTokens) return loc, retTokens - def tryParse( self, instring, loc ): + def tryParse(self, instring, loc): try: - return self._parse( instring, loc, doActions=False )[0] + return self._parse(instring, loc, doActions=False)[0] except ParseFatalException: - raise ParseException( instring, loc, self.errmsg, self) - + raise ParseException(instring, loc, self.errmsg, self) + def canParseNext(self, instring, loc): try: self.tryParse(instring, loc) @@ -1438,7 +1673,7 @@ def set(self, key, value): def clear(self): cache.clear() - + def cache_len(self): return len(cache) @@ -1448,6 +1683,7 @@ def cache_len(self): self.__len__ = types.MethodType(cache_len, self) if _OrderedDict is not None: + class _FifoCache(object): def __init__(self, size): self.not_in_cache = not_in_cache = object() @@ -1477,6 +1713,7 @@ def cache_len(self): self.__len__ = types.MethodType(cache_len, self) else: + class _FifoCache(object): def __init__(self, size): self.not_in_cache = not_in_cache = object() @@ -1506,13 +1743,15 @@ def cache_len(self): self.__len__ = types.MethodType(cache_len, self) # argument cache for optimizing repeated calls when backtracking through recursive expressions - packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail + packrat_cache = ( + {} + ) # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail packrat_cache_lock = RLock() packrat_cache_stats = [0, 0] # this method gets repeatedly called during backtracking with the same arguments - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): + def _parseCache(self, instring, loc, doActions=True, callPreParse=True): HIT, MISS = 0, 1 lookup = (self, instring, loc, callPreParse, doActions) with ParserElement.packrat_cache_lock: @@ -1540,35 +1779,38 @@ def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): @staticmethod def resetCache(): ParserElement.packrat_cache.clear() - ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) + ParserElement.packrat_cache_stats[:] = [0] * len( + ParserElement.packrat_cache_stats + ) _packratEnabled = False + @staticmethod def enablePackrat(cache_size_limit=128): """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - Parameters: - - cache_size_limit - (default=C{128}) - if an integer value is provided - will limit the size of the packrat cache; if None is passed, then - the cache size will be unbounded; if 0 is passed, the cache will - be effectively disabled. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method C{ParserElement.enablePackrat()}. If - your program uses C{psyco} to "compile as you go", you must call - C{enablePackrat} before calling C{psyco.full()}. If you do not do this, - Python will crash. For best results, call C{enablePackrat()} immediately - after importing pyparsing. - - Example:: - import pyparsing - pyparsing.ParserElement.enablePackrat() + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + Parameters: + - cache_size_limit - (default=C{128}) - if an integer value is provided + will limit the size of the packrat cache; if None is passed, then + the cache size will be unbounded; if 0 is passed, the cache will + be effectively disabled. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method C{ParserElement.enablePackrat()}. If + your program uses C{psyco} to "compile as you go", you must call + C{enablePackrat} before calling C{psyco.full()}. If you do not do this, + Python will crash. For best results, call C{enablePackrat()} immediately + after importing pyparsing. + + Example:: + import pyparsing + pyparsing.ParserElement.enablePackrat() """ if not ParserElement._packratEnabled: ParserElement._packratEnabled = True @@ -1578,7 +1820,7 @@ def enablePackrat(cache_size_limit=128): ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) ParserElement._parse = ParserElement._parseCache - def parseString( self, instring, parseAll=False ): + def parseString(self, instring, parseAll=False): """ Execute the parse expression with the given string. This is the main interface to the client code, once the complete @@ -1600,7 +1842,7 @@ def parseString( self, instring, parseAll=False ): reference the input string using the parse action's C{s} argument - explicitly expand the tabs in your input string before calling C{parseString} - + Example:: Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text @@ -1608,17 +1850,17 @@ def parseString( self, instring, parseAll=False ): ParserElement.resetCache() if not self.streamlined: self.streamline() - #~ self.saveAsList = True + # ~ self.saveAsList = True for e in self.ignoreExprs: e.streamline() if not self.keepTabs: instring = instring.expandtabs() try: - loc, tokens = self._parse( instring, 0 ) + loc, tokens = self._parse(instring, 0) if parseAll: - loc = self.preParse( instring, loc ) + loc = self.preParse(instring, loc) se = Empty() + StringEnd() - se._parse( instring, loc ) + se._parse(instring, loc) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -1628,7 +1870,7 @@ def parseString( self, instring, parseAll=False ): else: return tokens - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): + def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): """ Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location. May be called with optional @@ -1645,9 +1887,9 @@ def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): for tokens,start,end in Word(alphas).scanString(source): print(' '*start + '^'*(end-start)) print(' '*start + tokens[0]) - + prints:: - + sldjf123lsdjjkf345sldkjf879lkjsfd987 ^^^^^ sldjf @@ -1674,16 +1916,16 @@ def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): try: while loc <= instrlen and matches < maxMatches: try: - preloc = preparseFn( instring, loc ) - nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) + preloc = preparseFn(instring, loc) + nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) except ParseException: - loc = preloc+1 + loc = preloc + 1 else: if nextLoc > loc: matches += 1 yield tokens, preloc, nextLoc if overlap: - nextloc = preparseFn( instring, loc ) + nextloc = preparseFn(instring, loc) if nextloc > loc: loc = nextLoc else: @@ -1691,7 +1933,7 @@ def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): else: loc = nextLoc else: - loc = preloc+1 + loc = preloc + 1 except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -1699,7 +1941,7 @@ def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc - def transformString( self, instring ): + def transformString(self, instring): """ Extension to C{L{scanString}}, to modify matching text with modified tokens that may be returned from a parse action. To use C{transformString}, define a grammar and @@ -1707,11 +1949,11 @@ def transformString( self, instring ): Invoking C{transformString()} on a target string will then scan for matches, and replace the matched text patterns according to the logic in the parse action. C{transformString()} returns the resulting transformed string. - + Example:: wd = Word(alphas) wd.setParseAction(lambda toks: toks[0].title()) - + print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) Prints:: Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. @@ -1722,19 +1964,19 @@ def transformString( self, instring ): # keep string locs straight between transformString and scanString self.keepTabs = True try: - for t,s,e in self.scanString( instring ): - out.append( instring[lastE:s] ) + for t, s, e in self.scanString(instring): + out.append(instring[lastE:s]) if t: - if isinstance(t,ParseResults): + if isinstance(t, ParseResults): out += t.asList() - elif isinstance(t,list): + elif isinstance(t, list): out += t else: out.append(t) lastE = e out.append(instring[lastE:]) out = [o for o in out if o] - return "".join(map(_ustr,_flatten(out))) + return "".join(map(_ustr, _flatten(out))) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -1742,16 +1984,16 @@ def transformString( self, instring ): # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc - def searchString( self, instring, maxMatches=_MAX_INT ): + def searchString(self, instring, maxMatches=_MAX_INT): """ Another extension to C{L{scanString}}, simplifying the access to the tokens found to match the given parse expression. May be called with optional C{maxMatches} argument, to clip searching after 'n' matches are found. - + Example:: # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters cap_word = Word(alphas.upper(), alphas.lower()) - + print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) # the sum() builtin can be used to merge results into a single ParseResults object @@ -1761,7 +2003,9 @@ def searchString( self, instring, maxMatches=_MAX_INT ): ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] """ try: - return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) + return ParseResults( + [t for t, s, e in self.scanString(instring, maxMatches)] + ) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -1775,8 +2019,8 @@ def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): May be called with optional C{maxsplit} argument, to limit the number of splits; and the optional C{includeSeparators} argument (default=C{False}), if the separating matching text should be included in the split results. - - Example:: + + Example:: punc = oneOf(list(".,;:/-!?")) print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) prints:: @@ -1784,18 +2028,18 @@ def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): """ splits = 0 last = 0 - for t,s,e in self.scanString(instring, maxMatches=maxsplit): + for t, s, e in self.scanString(instring, maxMatches=maxsplit): yield instring[last:s] if includeSeparators: yield t[0] last = e yield instring[last:] - def __add__(self, other ): + def __add__(self, other): """ Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement converts them to L{Literal}s by default. - + Example:: greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" @@ -1803,23 +2047,29 @@ def __add__(self, other ): Prints:: Hello, World! -> ['Hello', ',', 'World', '!'] """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None - return And( [ self, other ] ) + return And([self, other]) - def __radd__(self, other ): + def __radd__(self, other): """ Implementation of + operator when left operand is not a C{L{ParserElement}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None return other + self @@ -1827,27 +2077,33 @@ def __sub__(self, other): """ Implementation of - operator, returns C{L{And}} with error stop """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None return self + And._ErrorStop() + other - def __rsub__(self, other ): + def __rsub__(self, other): """ Implementation of - operator when left operand is not a C{L{ParserElement}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None return other - self - def __mul__(self,other): + def __mul__(self, other): """ Implementation of * operator, allows use of C{expr * 3} in place of C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer @@ -1867,162 +2123,190 @@ def __mul__(self,other): occurrences. If this behavior is desired, then write C{expr*(None,n) + ~expr} """ - if isinstance(other,int): - minElements, optElements = other,0 - elif isinstance(other,tuple): + if isinstance(other, int): + minElements, optElements = other, 0 + elif isinstance(other, tuple): other = (other + (None, None))[:2] if other[0] is None: other = (0, other[1]) - if isinstance(other[0],int) and other[1] is None: + if isinstance(other[0], int) and other[1] is None: if other[0] == 0: return ZeroOrMore(self) if other[0] == 1: return OneOrMore(self) else: - return self*other[0] + ZeroOrMore(self) - elif isinstance(other[0],int) and isinstance(other[1],int): + return self * other[0] + ZeroOrMore(self) + elif isinstance(other[0], int) and isinstance(other[1], int): minElements, optElements = other optElements -= minElements else: - raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) + raise TypeError( + "cannot multiply 'ParserElement' and ('%s','%s') objects", + type(other[0]), + type(other[1]), + ) else: - raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) + raise TypeError( + "cannot multiply 'ParserElement' and '%s' objects", type(other) + ) if minElements < 0: raise ValueError("cannot multiply ParserElement by negative value") if optElements < 0: - raise ValueError("second tuple value must be greater or equal to first tuple value") + raise ValueError( + "second tuple value must be greater or equal to first tuple value" + ) if minElements == optElements == 0: raise ValueError("cannot multiply ParserElement by 0 or (0,0)") - if (optElements): + if optElements: + def makeOptionalList(n): - if n>1: - return Optional(self + makeOptionalList(n-1)) + if n > 1: + return Optional(self + makeOptionalList(n - 1)) else: return Optional(self) + if minElements: if minElements == 1: ret = self + makeOptionalList(optElements) else: - ret = And([self]*minElements) + makeOptionalList(optElements) + ret = And([self] * minElements) + makeOptionalList(optElements) else: ret = makeOptionalList(optElements) else: if minElements == 1: ret = self else: - ret = And([self]*minElements) + ret = And([self] * minElements) return ret def __rmul__(self, other): return self.__mul__(other) - def __or__(self, other ): + def __or__(self, other): """ Implementation of | operator - returns C{L{MatchFirst}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None - return MatchFirst( [ self, other ] ) + return MatchFirst([self, other]) - def __ror__(self, other ): + def __ror__(self, other): """ Implementation of | operator when left operand is not a C{L{ParserElement}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None return other | self - def __xor__(self, other ): + def __xor__(self, other): """ Implementation of ^ operator - returns C{L{Or}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None - return Or( [ self, other ] ) + return Or([self, other]) - def __rxor__(self, other ): + def __rxor__(self, other): """ Implementation of ^ operator when left operand is not a C{L{ParserElement}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None return other ^ self - def __and__(self, other ): + def __and__(self, other): """ Implementation of & operator - returns C{L{Each}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None - return Each( [ self, other ] ) + return Each([self, other]) - def __rand__(self, other ): + def __rand__(self, other): """ Implementation of & operator when left operand is not a C{L{ParserElement}} """ - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + if not isinstance(other, ParserElement): + warnings.warn( + "Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, + stacklevel=2, + ) return None return other & self - def __invert__( self ): + def __invert__(self): """ Implementation of ~ operator - returns C{L{NotAny}} """ - return NotAny( self ) + return NotAny(self) def __call__(self, name=None): """ Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. - + If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be passed as C{True}. - + If C{name} is omitted, same as calling C{L{copy}}. Example:: # these are equivalent userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") """ if name is not None: return self.setResultsName(name) else: return self.copy() - def suppress( self ): + def suppress(self): """ Suppresses the output of this C{ParserElement}; useful to keep punctuation from cluttering up returned output. """ - return Suppress( self ) + return Suppress(self) - def leaveWhitespace( self ): + def leaveWhitespace(self): """ Disables the skipping of whitespace before matching the characters in the C{ParserElement}'s defined pattern. This is normally only used internally by @@ -2031,7 +2315,7 @@ def leaveWhitespace( self ): self.skipWhitespace = False return self - def setWhitespaceChars( self, chars ): + def setWhitespaceChars(self, chars): """ Overrides the default whitespace chars """ @@ -2040,7 +2324,7 @@ def setWhitespaceChars( self, chars ): self.copyDefaultWhiteChars = False return self - def parseWithTabs( self ): + def parseWithTabs(self): """ Overrides default behavior to expand C{}s to spaces before parsing the input string. Must be called before C{parseString} when the input grammar contains elements that @@ -2049,40 +2333,42 @@ def parseWithTabs( self ): self.keepTabs = True return self - def ignore( self, other ): + def ignore(self, other): """ Define expression to be ignored (e.g., comments) while doing pattern matching; may be called repeatedly, to define multiple comment or other ignorable patterns. - + Example:: patt = OneOrMore(Word(alphas)) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] - + patt.ignore(cStyleComment) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] """ if isinstance(other, basestring): other = Suppress(other) - if isinstance( other, Suppress ): + if isinstance(other, Suppress): if other not in self.ignoreExprs: self.ignoreExprs.append(other) else: - self.ignoreExprs.append( Suppress( other.copy() ) ) + self.ignoreExprs.append(Suppress(other.copy())) return self - def setDebugActions( self, startAction, successAction, exceptionAction ): + def setDebugActions(self, startAction, successAction, exceptionAction): """ Enable display of debugging messages while doing pattern matching. """ - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction) + self.debugActions = ( + startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, + exceptionAction or _defaultExceptionDebugAction, + ) self.debug = True return self - def setDebug( self, flag=True ): + def setDebug(self, flag=True): """ Enable display of debugging messages while doing pattern matching. Set C{flag} to True to enable, False to disable. @@ -2091,12 +2377,12 @@ def setDebug( self, flag=True ): wd = Word(alphas).setName("alphaword") integer = Word(nums).setName("numword") term = wd | integer - + # turn on debugging for wd wd.setDebug() OneOrMore(term).parseString("abc 123 xyz 890") - + prints:: Match alphaword at loc 0(1,1) Matched alphaword -> ['abc'] @@ -2118,32 +2404,36 @@ def setDebug( self, flag=True ): name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. """ if flag: - self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) + self.setDebugActions( + _defaultStartDebugAction, + _defaultSuccessDebugAction, + _defaultExceptionDebugAction, + ) else: self.debug = False return self - def __str__( self ): + def __str__(self): return self.name - def __repr__( self ): + def __repr__(self): return _ustr(self) - def streamline( self ): + def streamline(self): self.streamlined = True self.strRepr = None return self - def checkRecursion( self, parseElementList ): + def checkRecursion(self, parseElementList): pass - def validate( self, validateTrace=[] ): + def validate(self, validateTrace=[]): """ Check defined expressions for valid structure, check for infinite recursive definitions. """ - self.checkRecursion( [] ) + self.checkRecursion([]) - def parseFile( self, file_or_filename, parseAll=False ): + def parseFile(self, file_or_filename, parseAll=False): """ Execute the parse expression on the given file or filename. If a filename is specified (instead of a file object), @@ -2163,35 +2453,35 @@ def parseFile( self, file_or_filename, parseAll=False ): # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc - def __eq__(self,other): + def __eq__(self, other): if isinstance(other, ParserElement): return self is other or vars(self) == vars(other) elif isinstance(other, basestring): return self.matches(other) else: - return super(ParserElement,self)==other + return super(ParserElement, self) == other - def __ne__(self,other): + def __ne__(self, other): return not (self == other) def __hash__(self): return hash(id(self)) - def __req__(self,other): + def __req__(self, other): return self == other - def __rne__(self,other): + def __rne__(self, other): return not (self == other) def matches(self, testString, parseAll=True): """ - Method for quick testing of a parser against a test string. Good for simple + Method for quick testing of a parser against a test string. Good for simple inline microtests of sub expressions while building up larger parser. - + Parameters: - testString - to test against this expression for a match - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - + Example:: expr = Word(nums) assert expr.matches("100") @@ -2201,17 +2491,25 @@ def matches(self, testString, parseAll=True): return True except ParseBaseException: return False - - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): + + def runTests( + self, + tests, + parseAll=True, + comment="#", + fullDump=True, + printResults=True, + failureTests=False, + ): """ Execute the parse expression on a series of test strings, showing each test, the parsed results or where the parse failed. Quick and easy way to run a parse expression against a list of sample strings. - + Parameters: - tests - a list of separate test strings, or a multiline string of test strings - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - comment - (default=C{'#'}) - expression for indicating embedded comments in the test + - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests + - comment - (default=C{'#'}) - expression for indicating embedded comments in the test string; pass None to disable comment filtering - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; if False, only dump nested list @@ -2219,9 +2517,9 @@ def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResult - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing Returns: a (success, results) tuple, where success indicates that all tests succeeded - (or failed if C{failureTests} is True), and the results contain a list of lines of each + (or failed if C{failureTests} is True), and the results contain a list of lines of each test's output - + Example:: number_expr = pyparsing_common.number.copy() @@ -2264,7 +2562,7 @@ def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResult [1e-12] Success - + # stray character 100Z ^ @@ -2286,7 +2584,7 @@ def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResult lines, create a test like this:: expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") - + (Note that this is a raw string literal, you must include the leading 'r'.) """ if isinstance(tests, basestring): @@ -2302,20 +2600,20 @@ def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResult continue if not t: continue - out = ['\n'.join(comments), t] + out = ["\n".join(comments), t] comments = [] try: - t = t.replace(r'\n','\n') + t = t.replace(r"\n", "\n") result = self.parseString(t, parseAll=parseAll) out.append(result.dump(full=fullDump)) success = success and not failureTests except ParseBaseException as pe: fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" - if '\n' in t: + if "\n" in t: out.append(line(pe.loc, t)) - out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) + out.append(" " * (col(pe.loc, t) - 1) + "^" + fatal) else: - out.append(' '*pe.loc + '^' + fatal) + out.append(" " * pe.loc + "^" + fatal) out.append("FAIL: " + str(pe)) success = success and failureTests result = pe @@ -2326,28 +2624,30 @@ def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResult if printResults: if fullDump: - out.append('') - print('\n'.join(out)) + out.append("") + print("\n".join(out)) allResults.append((t, result)) - + return success, allResults - + class Token(ParserElement): """ Abstract C{ParserElement} subclass, for defining atomic matching patterns. """ - def __init__( self ): - super(Token,self).__init__( savelist=False ) + + def __init__(self): + super(Token, self).__init__(savelist=False) class Empty(Token): """ An empty token, will always match. """ - def __init__( self ): - super(Empty,self).__init__() + + def __init__(self): + super(Empty, self).__init__() self.name = "Empty" self.mayReturnEmpty = True self.mayIndexError = False @@ -2357,40 +2657,45 @@ class NoMatch(Token): """ A token that will never match. """ - def __init__( self ): - super(NoMatch,self).__init__() + + def __init__(self): + super(NoMatch, self).__init__() self.name = "NoMatch" self.mayReturnEmpty = True self.mayIndexError = False self.errmsg = "Unmatchable token" - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): raise ParseException(instring, loc, self.errmsg, self) class Literal(Token): """ Token to exactly match a specified string. - + Example:: Literal('blah').parseString('blah') # -> ['blah'] Literal('blah').parseString('blahfooblah') # -> ['blah'] Literal('blah').parseString('bla') # -> Exception: Expected "blah" - + For case-insensitive matching, use L{CaselessLiteral}. - + For keyword matching (force word break before and after the matched string), use L{Keyword} or L{CaselessKeyword}. """ - def __init__( self, matchString ): - super(Literal,self).__init__() + + def __init__(self, matchString): + super(Literal, self).__init__() self.match = matchString self.matchLen = len(matchString) try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", - SyntaxWarning, stacklevel=2) + warnings.warn( + "null string passed to Literal; use Empty() instead", + SyntaxWarning, + stacklevel=2, + ) self.__class__ = Empty self.name = '"%s"' % _ustr(self.match) self.errmsg = "Expected " + self.name @@ -2400,15 +2705,19 @@ def __init__( self, matchString ): # Performance tuning: this routine gets called a *lot* # if this is a single character match string and the first character matches, # short-circuit as quickly as possible, and avoid calling startswith - #~ @profile - def parseImpl( self, instring, loc, doActions=True ): - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) ): - return loc+self.matchLen, self.match + # ~ @profile + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] == self.firstMatchChar and ( + self.matchLen == 1 or instring.startswith(self.match, loc) + ): + return loc + self.matchLen, self.match raise ParseException(instring, loc, self.errmsg, self) + + _L = Literal ParserElement._literalStringClass = Literal + class Keyword(Token): """ Token to exactly match a specified string as a keyword, that is, it must be @@ -2419,17 +2728,18 @@ class Keyword(Token): - C{identChars} is a string of characters that would be valid identifier characters, defaulting to all alphanumerics + "_" and "$" - C{caseless} allows case-insensitive matching, default is C{False}. - + Example:: Keyword("start").parseString("start") # -> ['start'] Keyword("start").parseString("starting") # -> Exception For case-insensitive matching, use L{CaselessKeyword}. """ - DEFAULT_KEYWORD_CHARS = alphanums+"_$" - def __init__( self, matchString, identChars=None, caseless=False ): - super(Keyword,self).__init__() + DEFAULT_KEYWORD_CHARS = alphanums + "_$" + + def __init__(self, matchString, identChars=None, caseless=False): + super(Keyword, self).__init__() if identChars is None: identChars = Keyword.DEFAULT_KEYWORD_CHARS self.match = matchString @@ -2437,8 +2747,11 @@ def __init__( self, matchString, identChars=None, caseless=False ): try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", - SyntaxWarning, stacklevel=2) + warnings.warn( + "null string passed to Keyword; use Empty() instead", + SyntaxWarning, + stacklevel=2, + ) self.name = '"%s"' % self.match self.errmsg = "Expected " + self.name self.mayReturnEmpty = False @@ -2449,31 +2762,41 @@ def __init__( self, matchString, identChars=None, caseless=False ): identChars = identChars.upper() self.identChars = set(identChars) - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if self.caseless: - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and - (loc == 0 or instring[loc-1].upper() not in self.identChars) ): - return loc+self.matchLen, self.match + if ( + (instring[loc : loc + self.matchLen].upper() == self.caselessmatch) + and ( + loc >= len(instring) - self.matchLen + or instring[loc + self.matchLen].upper() not in self.identChars + ) + and (loc == 0 or instring[loc - 1].upper() not in self.identChars) + ): + return loc + self.matchLen, self.match else: - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and - (loc == 0 or instring[loc-1] not in self.identChars) ): - return loc+self.matchLen, self.match + if ( + instring[loc] == self.firstMatchChar + and (self.matchLen == 1 or instring.startswith(self.match, loc)) + and ( + loc >= len(instring) - self.matchLen + or instring[loc + self.matchLen] not in self.identChars + ) + and (loc == 0 or instring[loc - 1] not in self.identChars) + ): + return loc + self.matchLen, self.match raise ParseException(instring, loc, self.errmsg, self) def copy(self): - c = super(Keyword,self).copy() + c = super(Keyword, self).copy() c.identChars = Keyword.DEFAULT_KEYWORD_CHARS return c @staticmethod - def setDefaultKeywordChars( chars ): - """Overrides the default Keyword chars - """ + def setDefaultKeywordChars(chars): + """Overrides the default Keyword chars""" Keyword.DEFAULT_KEYWORD_CHARS = chars + class CaselessLiteral(Literal): """ Token to match a specified string, ignoring case of letters. @@ -2482,52 +2805,58 @@ class CaselessLiteral(Literal): Example:: OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] - + (Contrast with example for L{CaselessKeyword}.) """ - def __init__( self, matchString ): - super(CaselessLiteral,self).__init__( matchString.upper() ) + + def __init__(self, matchString): + super(CaselessLiteral, self).__init__(matchString.upper()) # Preserve the defining literal. self.returnString = matchString self.name = "'%s'" % self.returnString self.errmsg = "Expected " + self.name - def parseImpl( self, instring, loc, doActions=True ): - if instring[ loc:loc+self.matchLen ].upper() == self.match: - return loc+self.matchLen, self.returnString + def parseImpl(self, instring, loc, doActions=True): + if instring[loc : loc + self.matchLen].upper() == self.match: + return loc + self.matchLen, self.returnString raise ParseException(instring, loc, self.errmsg, self) + class CaselessKeyword(Keyword): """ Caseless version of L{Keyword}. Example:: OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] - + (Contrast with example for L{CaselessLiteral}.) """ - def __init__( self, matchString, identChars=None ): - super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) - def parseImpl( self, instring, loc, doActions=True ): - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): - return loc+self.matchLen, self.match + def __init__(self, matchString, identChars=None): + super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True) + + def parseImpl(self, instring, loc, doActions=True): + if (instring[loc : loc + self.matchLen].upper() == self.caselessmatch) and ( + loc >= len(instring) - self.matchLen + or instring[loc + self.matchLen].upper() not in self.identChars + ): + return loc + self.matchLen, self.match raise ParseException(instring, loc, self.errmsg, self) + class CloseMatch(Token): """ - A variation on L{Literal} which matches "close" matches, that is, + A variation on L{Literal} which matches "close" matches, that is, strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: - C{match_string} - string to be matched - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match - + The results from a successful parse will contain the matched text from the input string and the following named results: - C{mismatches} - a list of the positions within the match_string where mismatches were found - C{original} - the original match_string used to compare against the input string - + If C{mismatches} is an empty list, then the match was an exact match. - + Example:: patt = CloseMatch("ATCATCGAATGGA") patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) @@ -2540,16 +2869,20 @@ class CloseMatch(Token): patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) """ + def __init__(self, match_string, maxMismatches=1): - super(CloseMatch,self).__init__() + super(CloseMatch, self).__init__() self.name = match_string self.match_string = match_string self.maxMismatches = maxMismatches - self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) + self.errmsg = "Expected %r (with up to %d mismatches)" % ( + self.match_string, + self.maxMismatches, + ) self.mayIndexError = False self.mayReturnEmpty = False - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): start = loc instrlen = len(instring) maxloc = start + len(self.match_string) @@ -2560,8 +2893,10 @@ def parseImpl( self, instring, loc, doActions=True ): mismatches = [] maxMismatches = self.maxMismatches - for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): - src,mat = s_m + for match_stringloc, s_m in enumerate( + zip(instring[loc:maxloc], self.match_string) + ): + src, mat = s_m if src != mat: mismatches.append(match_stringloc) if len(mismatches) > maxMismatches: @@ -2569,8 +2904,8 @@ def parseImpl( self, instring, loc, doActions=True ): else: loc = match_stringloc + 1 results = ParseResults([instring[start:loc]]) - results['original'] = self.match_string - results['mismatches'] = mismatches + results["original"] = self.match_string + results["mismatches"] = mismatches return loc, results raise ParseException(instring, loc, self.errmsg, self) @@ -2585,14 +2920,14 @@ class Word(Token): maximum, and/or exact length. The default value for C{min} is 1 (a minimum value < 1 is not valid); the default values for C{max} and C{exact} are 0, meaning no maximum or exact length restriction. An optional - C{excludeChars} parameter can list characters that might be found in + C{excludeChars} parameter can list characters that might be found in the input C{bodyChars} string; useful to define a word of all printables except for one or two characters, for instance. - - L{srange} is useful for defining custom character set strings for defining + + L{srange} is useful for defining custom character set strings for defining C{Word} expressions, using range notation from regular expression character sets. - - A common mistake is to use C{Word} to match a specific literal string, as in + + A common mistake is to use C{Word} to match a specific literal string, as in C{Word("Address")}. Remember that C{Word} uses the string argument to define I{sets} of matchable characters. This expression would match "Add", "AAA", "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. @@ -2610,28 +2945,38 @@ class Word(Token): Example:: # a word composed of digits integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) - + # a word with a leading capital, and zero or more lowercase capital_word = Word(alphas.upper(), alphas.lower()) # hostnames are alphanumeric, with leading alpha, and '-' hostname = Word(alphas, alphanums+'-') - + # roman numeral (not a strict parser, accepts invalid mix of characters) roman = Word("IVXLCDM") - + # any string of non-whitespace characters, except for ',' csv_value = Word(printables, excludeChars=",") """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): - super(Word,self).__init__() + + def __init__( + self, + initChars, + bodyChars=None, + min=1, + max=0, + exact=0, + asKeyword=False, + excludeChars=None, + ): + super(Word, self).__init__() if excludeChars: - initChars = ''.join(c for c in initChars if c not in excludeChars) + initChars = "".join(c for c in initChars if c not in excludeChars) if bodyChars: - bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) + bodyChars = "".join(c for c in bodyChars if c not in excludeChars) self.initCharsOrig = initChars self.initChars = set(initChars) - if bodyChars : + if bodyChars: self.bodyCharsOrig = bodyChars self.bodyChars = set(bodyChars) else: @@ -2641,7 +2986,10 @@ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword= self.maxSpecified = max > 0 if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") + raise ValueError( + "cannot specify a minimum length < 1; use Optional(Word()) if" + " zero-length word is permitted" + ) self.minLen = min @@ -2659,34 +3007,38 @@ def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword= self.mayIndexError = False self.asKeyword = asKeyword - if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): + if " " not in self.initCharsOrig + self.bodyCharsOrig and ( + min == 1 and max == 0 and exact == 0 + ): if self.bodyCharsOrig == self.initCharsOrig: self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) elif len(self.initCharsOrig) == 1: - self.reString = "%s[%s]*" % \ - (re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) + self.reString = "%s[%s]*" % ( + re.escape(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig), + ) else: - self.reString = "[%s][%s]*" % \ - (_escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) + self.reString = "[%s][%s]*" % ( + _escapeRegexRangeChars(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig), + ) if self.asKeyword: - self.reString = r"\b"+self.reString+r"\b" + self.reString = r"\b" + self.reString + r"\b" try: - self.re = re.compile( self.reString ) + self.re = re.compile(self.reString) except Exception: self.re = None - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if self.re: - result = self.re.match(instring,loc) + result = self.re.match(instring, loc) if not result: raise ParseException(instring, loc, self.errmsg, self) loc = result.end() return loc, result.group() - if not(instring[ loc ] in self.initChars): + if not (instring[loc] in self.initChars): raise ParseException(instring, loc, self.errmsg, self) start = loc @@ -2694,7 +3046,7 @@ def parseImpl( self, instring, loc, doActions=True ): instrlen = len(instring) bodychars = self.bodyChars maxloc = start + self.maxLen - maxloc = min( maxloc, instrlen ) + maxloc = min(maxloc, instrlen) while loc < maxloc and instring[loc] in bodychars: loc += 1 @@ -2704,7 +3056,9 @@ def parseImpl( self, instring, loc, doActions=True ): if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: throwException = True if self.asKeyword: - if (start>0 and instring[start-1] in bodychars) or (loc 0 and instring[start - 1] in bodychars) or ( + loc < instrlen and instring[loc] in bodychars + ): throwException = True if throwException: @@ -2712,23 +3066,25 @@ def parseImpl( self, instring, loc, doActions=True ): return loc, instring[start:loc] - def __str__( self ): + def __str__(self): try: - return super(Word,self).__str__() + return super(Word, self).__str__() except Exception: pass - if self.strRepr is None: def charsAsStr(s): - if len(s)>4: - return s[:4]+"..." + if len(s) > 4: + return s[:4] + "..." else: return s - if ( self.initCharsOrig != self.bodyCharsOrig ): - self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) + if self.initCharsOrig != self.bodyCharsOrig: + self.strRepr = "W:(%s,%s)" % ( + charsAsStr(self.initCharsOrig), + charsAsStr(self.bodyCharsOrig), + ) else: self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) @@ -2739,7 +3095,7 @@ class Regex(Token): r""" Token for matching strings that match a given regular expression. Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. - If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as + If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as named parse results. Example:: @@ -2749,14 +3105,18 @@ class Regex(Token): roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") """ compiledREtype = type(re.compile("[A-Z]")) - def __init__( self, pattern, flags=0): + + def __init__(self, pattern, flags=0): """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" - super(Regex,self).__init__() + super(Regex, self).__init__() if isinstance(pattern, basestring): if not pattern: - warnings.warn("null string passed to Regex; use Empty() instead", - SyntaxWarning, stacklevel=2) + warnings.warn( + "null string passed to Regex; use Empty() instead", + SyntaxWarning, + stacklevel=2, + ) self.pattern = pattern self.flags = flags @@ -2765,26 +3125,30 @@ def __init__( self, pattern, flags=0): self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, stacklevel=2) + warnings.warn( + "invalid pattern (%s) passed to Regex" % pattern, + SyntaxWarning, + stacklevel=2, + ) raise elif isinstance(pattern, Regex.compiledREtype): self.re = pattern - self.pattern = \ - self.reString = str(pattern) + self.pattern = self.reString = str(pattern) self.flags = flags - + else: - raise ValueError("Regex may only be constructed with a string or a compiled RE object") + raise ValueError( + "Regex may only be constructed with a string or a compiled RE object" + ) self.name = _ustr(self) self.errmsg = "Expected " + self.name self.mayIndexError = False self.mayReturnEmpty = True - def parseImpl( self, instring, loc, doActions=True ): - result = self.re.match(instring,loc) + def parseImpl(self, instring, loc, doActions=True): + result = self.re.match(instring, loc) if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -2794,11 +3158,11 @@ def parseImpl( self, instring, loc, doActions=True ): if d: for k in d: ret[k] = d[k] - return loc,ret + return loc, ret - def __str__( self ): + def __str__(self): try: - return super(Regex,self).__str__() + return super(Regex, self).__str__() except Exception: pass @@ -2811,7 +3175,7 @@ def __str__( self ): class QuotedString(Token): r""" Token for matching strings that are delimited by quoting characters. - + Defined with the following parameters: - quoteChar - string of one or more characters defining the quote delimiting string - escChar - character to escape quotes, typically backslash (default=C{None}) @@ -2833,13 +3197,25 @@ class QuotedString(Token): [['This is the "quote"']] [['This is the quote with "embedded" quotes']] """ - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): - super(QuotedString,self).__init__() + + def __init__( + self, + quoteChar, + escChar=None, + escQuote=None, + multiline=False, + unquoteResults=True, + endQuoteChar=None, + convertWhitespaceEscapes=True, + ): + super(QuotedString, self).__init__() # remove white space from quote chars - won't work anyway quoteChar = quoteChar.strip() if not quoteChar: - warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + warnings.warn( + "quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2 + ) raise SyntaxError() if endQuoteChar is None: @@ -2847,7 +3223,11 @@ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unq else: endQuoteChar = endQuoteChar.strip() if not endQuoteChar: - warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + warnings.warn( + "endQuoteChar cannot be the empty string", + SyntaxWarning, + stacklevel=2, + ) raise SyntaxError() self.quoteChar = quoteChar @@ -2862,35 +3242,47 @@ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unq if multiline: self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'%s(?:[^%s%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + self.pattern = r"%s(?:[^%s%s]" % ( + re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or ""), + ) else: self.flags = 0 - self.pattern = r'%s(?:[^%s\n\r%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + self.pattern = r"%s(?:[^%s\n\r%s]" % ( + re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or ""), + ) if len(self.endQuoteChar) > 1: self.pattern += ( - '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) - for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' + "|(?:" + + ")|(?:".join( + "%s[^%s]" + % ( + re.escape(self.endQuoteChar[:i]), + _escapeRegexRangeChars(self.endQuoteChar[i]), + ) + for i in range(len(self.endQuoteChar) - 1, 0, -1) ) + + ")" + ) if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) + self.pattern += r"|(?:%s)" % re.escape(escQuote) if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar)+"(.)" - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) + self.pattern += r"|(?:%s.)" % re.escape(escChar) + self.escCharReplacePattern = re.escape(self.escChar) + "(.)" + self.pattern += r")*%s" % re.escape(self.endQuoteChar) try: self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, stacklevel=2) + warnings.warn( + "invalid pattern (%s) passed to Regex" % self.pattern, + SyntaxWarning, + stacklevel=2, + ) raise self.name = _ustr(self) @@ -2898,8 +3290,12 @@ def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unq self.mayIndexError = False self.mayReturnEmpty = True - def parseImpl( self, instring, loc, doActions=True ): - result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None + def parseImpl(self, instring, loc, doActions=True): + result = ( + instring[loc] == self.firstQuoteChar + and self.re.match(instring, loc) + or None + ) if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -2909,18 +3305,18 @@ def parseImpl( self, instring, loc, doActions=True ): if self.unquoteResults: # strip off quotes - ret = ret[self.quoteCharLen:-self.endQuoteCharLen] + ret = ret[self.quoteCharLen : -self.endQuoteCharLen] - if isinstance(ret,basestring): + if isinstance(ret, basestring): # replace escaped whitespace - if '\\' in ret and self.convertWhitespaceEscapes: + if "\\" in ret and self.convertWhitespaceEscapes: ws_map = { - r'\t' : '\t', - r'\n' : '\n', - r'\f' : '\f', - r'\r' : '\r', + r"\t": "\t", + r"\n": "\n", + r"\f": "\f", + r"\r": "\r", } - for wslit,wschar in ws_map.items(): + for wslit, wschar in ws_map.items(): ret = ret.replace(wslit, wschar) # replace escaped characters @@ -2933,14 +3329,17 @@ def parseImpl( self, instring, loc, doActions=True ): return loc, ret - def __str__( self ): + def __str__(self): try: - return super(QuotedString,self).__str__() + return super(QuotedString, self).__str__() except Exception: pass if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) + self.strRepr = "quoted string, starting with %s ending with %s" % ( + self.quoteChar, + self.endQuoteChar, + ) return self.strRepr @@ -2961,13 +3360,17 @@ class CharsNotIn(Token): prints:: ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] """ - def __init__( self, notChars, min=1, max=0, exact=0 ): - super(CharsNotIn,self).__init__() + + def __init__(self, notChars, min=1, max=0, exact=0): + super(CharsNotIn, self).__init__() self.skipWhitespace = False self.notChars = notChars if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") + raise ValueError( + "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if" + " zero-length char group is permitted" + ) self.minLen = min @@ -2982,19 +3385,18 @@ def __init__( self, notChars, min=1, max=0, exact=0 ): self.name = _ustr(self) self.errmsg = "Expected " + self.name - self.mayReturnEmpty = ( self.minLen == 0 ) + self.mayReturnEmpty = self.minLen == 0 self.mayIndexError = False - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if instring[loc] in self.notChars: raise ParseException(instring, loc, self.errmsg, self) start = loc loc += 1 notchars = self.notChars - maxlen = min( start+self.maxLen, len(instring) ) - while loc < maxlen and \ - (instring[loc] not in notchars): + maxlen = min(start + self.maxLen, len(instring)) + while loc < maxlen and (instring[loc] not in notchars): loc += 1 if loc - start < self.minLen: @@ -3002,7 +3404,7 @@ def parseImpl( self, instring, loc, doActions=True ): return loc, instring[start:loc] - def __str__( self ): + def __str__(self): try: return super(CharsNotIn, self).__str__() except Exception: @@ -3016,6 +3418,7 @@ def __str__( self ): return self.strRepr + class White(Token): """ Special matching class for matching whitespace. Normally, whitespace is ignored @@ -3024,19 +3427,23 @@ class White(Token): matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, as defined for the C{L{Word}} class. """ + whiteStrs = { - " " : "", + " ": "", "\t": "", "\n": "", "\r": "", "\f": "", - } + } + def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White,self).__init__() + super(White, self).__init__() self.matchWhite = ws - self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) - #~ self.leaveWhitespace() - self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) + self.setWhitespaceChars( + "".join(c for c in self.whiteChars if c not in self.matchWhite) + ) + # ~ self.leaveWhitespace() + self.name = "".join(White.whiteStrs[c] for c in self.matchWhite) self.mayReturnEmpty = True self.errmsg = "Expected " + self.name @@ -3051,13 +3458,13 @@ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): self.maxLen = exact self.minLen = exact - def parseImpl( self, instring, loc, doActions=True ): - if not(instring[ loc ] in self.matchWhite): + def parseImpl(self, instring, loc, doActions=True): + if not (instring[loc] in self.matchWhite): raise ParseException(instring, loc, self.errmsg, self) start = loc loc += 1 maxloc = start + self.maxLen - maxloc = min( maxloc, len(instring) ) + maxloc = min(maxloc, len(instring)) while loc < maxloc and instring[loc] in self.matchWhite: loc += 1 @@ -3068,44 +3475,50 @@ def parseImpl( self, instring, loc, doActions=True ): class _PositionToken(Token): - def __init__( self ): - super(_PositionToken,self).__init__() - self.name=self.__class__.__name__ + def __init__(self): + super(_PositionToken, self).__init__() + self.name = self.__class__.__name__ self.mayReturnEmpty = True self.mayIndexError = False + class GoToColumn(_PositionToken): """ Token to advance to a specific column of input text; useful for tabular report scraping. """ - def __init__( self, colno ): - super(GoToColumn,self).__init__() + + def __init__(self, colno): + super(GoToColumn, self).__init__() self.col = colno - def preParse( self, instring, loc ): - if col(loc,instring) != self.col: + def preParse(self, instring, loc): + if col(loc, instring) != self.col: instrlen = len(instring) if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : + loc = self._skipIgnorables(instring, loc) + while ( + loc < instrlen + and instring[loc].isspace() + and col(loc, instring) != self.col + ): loc += 1 return loc - def parseImpl( self, instring, loc, doActions=True ): - thiscol = col( loc, instring ) + def parseImpl(self, instring, loc, doActions=True): + thiscol = col(loc, instring) if thiscol > self.col: - raise ParseException( instring, loc, "Text not in expected column", self ) + raise ParseException(instring, loc, "Text not in expected column", self) newloc = loc + self.col - thiscol - ret = instring[ loc: newloc ] + ret = instring[loc:newloc] return newloc, ret class LineStart(_PositionToken): """ Matches if current position is at the beginning of a line within the parse string - + Example:: - + test = '''\ AAA this line AAA and this line @@ -3115,74 +3528,82 @@ class LineStart(_PositionToken): for t in (LineStart() + 'AAA' + restOfLine).searchString(test): print(t) - + Prints:: ['AAA', ' this line'] - ['AAA', ' and this line'] + ['AAA', ' and this line'] """ - def __init__( self ): - super(LineStart,self).__init__() + + def __init__(self): + super(LineStart, self).__init__() self.errmsg = "Expected start of line" - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if col(loc, instring) == 1: return loc, [] raise ParseException(instring, loc, self.errmsg, self) + class LineEnd(_PositionToken): """ Matches if current position is at the end of a line within the parse string """ - def __init__( self ): - super(LineEnd,self).__init__() - self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + + def __init__(self): + super(LineEnd, self).__init__() + self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) self.errmsg = "Expected end of line" - def parseImpl( self, instring, loc, doActions=True ): - if loc len(instring): return loc, [] else: raise ParseException(instring, loc, self.errmsg, self) + class WordStart(_PositionToken): """ Matches if the current position is at the beginning of a Word, and @@ -3191,18 +3612,22 @@ class WordStart(_PositionToken): use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of the string being parsed, or at the beginning of a line. """ - def __init__(self, wordChars = printables): - super(WordStart,self).__init__() + + def __init__(self, wordChars=printables): + super(WordStart, self).__init__() self.wordChars = set(wordChars) self.errmsg = "Not at the start of a word" - def parseImpl(self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if loc != 0: - if (instring[loc-1] in self.wordChars or - instring[loc] not in self.wordChars): + if ( + instring[loc - 1] in self.wordChars + or instring[loc] not in self.wordChars + ): raise ParseException(instring, loc, self.errmsg, self) return loc, [] + class WordEnd(_PositionToken): """ Matches if the current position is at the end of a Word, and @@ -3211,17 +3636,20 @@ class WordEnd(_PositionToken): use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of the string being parsed, or at the end of a line. """ - def __init__(self, wordChars = printables): - super(WordEnd,self).__init__() + + def __init__(self, wordChars=printables): + super(WordEnd, self).__init__() self.wordChars = set(wordChars) self.skipWhitespace = False self.errmsg = "Not at the end of a word" - def parseImpl(self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): instrlen = len(instring) - if instrlen>0 and loc 0 and loc < instrlen: + if ( + instring[loc] in self.wordChars + or instring[loc - 1] not in self.wordChars + ): raise ParseException(instring, loc, self.errmsg, self) return loc, [] @@ -3230,14 +3658,15 @@ class ParseExpression(ParserElement): """ Abstract subclass of ParserElement, for combining and post-processing parsed tokens. """ - def __init__( self, exprs, savelist = False ): - super(ParseExpression,self).__init__(savelist) - if isinstance( exprs, _generatorType ): + + def __init__(self, exprs, savelist=False): + super(ParseExpression, self).__init__(savelist) + if isinstance(exprs, _generatorType): exprs = list(exprs) - if isinstance( exprs, basestring ): - self.exprs = [ ParserElement._literalStringClass( exprs ) ] - elif isinstance( exprs, collections.Iterable ): + if isinstance(exprs, basestring): + self.exprs = [ParserElement._literalStringClass(exprs)] + elif isinstance(exprs, collections.Iterable): exprs = list(exprs) # if sequence of strings provided, wrap with Literal if all(isinstance(expr, basestring) for expr in exprs): @@ -3245,52 +3674,52 @@ def __init__( self, exprs, savelist = False ): self.exprs = list(exprs) else: try: - self.exprs = list( exprs ) + self.exprs = list(exprs) except TypeError: - self.exprs = [ exprs ] + self.exprs = [exprs] self.callPreparse = False - def __getitem__( self, i ): + def __getitem__(self, i): return self.exprs[i] - def append( self, other ): - self.exprs.append( other ) + def append(self, other): + self.exprs.append(other) self.strRepr = None return self - def leaveWhitespace( self ): + def leaveWhitespace(self): """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on - all contained expressions.""" + all contained expressions.""" self.skipWhitespace = False - self.exprs = [ e.copy() for e in self.exprs ] + self.exprs = [e.copy() for e in self.exprs] for e in self.exprs: e.leaveWhitespace() return self - def ignore( self, other ): - if isinstance( other, Suppress ): + def ignore(self, other): + if isinstance(other, Suppress): if other not in self.ignoreExprs: - super( ParseExpression, self).ignore( other ) + super(ParseExpression, self).ignore(other) for e in self.exprs: - e.ignore( self.ignoreExprs[-1] ) + e.ignore(self.ignoreExprs[-1]) else: - super( ParseExpression, self).ignore( other ) + super(ParseExpression, self).ignore(other) for e in self.exprs: - e.ignore( self.ignoreExprs[-1] ) + e.ignore(self.ignoreExprs[-1]) return self - def __str__( self ): + def __str__(self): try: - return super(ParseExpression,self).__str__() + return super(ParseExpression, self).__str__() except Exception: pass if self.strRepr is None: - self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) + self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs)) return self.strRepr - def streamline( self ): - super(ParseExpression,self).streamline() + def streamline(self): + super(ParseExpression, self).streamline() for e in self.exprs: e.streamline() @@ -3298,46 +3727,51 @@ def streamline( self ): # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) # but only if there are no parse actions or resultsNames on the nested And's # (likewise for Or's and MatchFirst's) - if ( len(self.exprs) == 2 ): + if len(self.exprs) == 2: other = self.exprs[0] - if ( isinstance( other, self.__class__ ) and - not(other.parseAction) and - other.resultsName is None and - not other.debug ): - self.exprs = other.exprs[:] + [ self.exprs[1] ] + if ( + isinstance(other, self.__class__) + and not (other.parseAction) + and other.resultsName is None + and not other.debug + ): + self.exprs = other.exprs[:] + [self.exprs[1]] self.strRepr = None self.mayReturnEmpty |= other.mayReturnEmpty - self.mayIndexError |= other.mayIndexError + self.mayIndexError |= other.mayIndexError other = self.exprs[-1] - if ( isinstance( other, self.__class__ ) and - not(other.parseAction) and - other.resultsName is None and - not other.debug ): + if ( + isinstance(other, self.__class__) + and not (other.parseAction) + and other.resultsName is None + and not other.debug + ): self.exprs = self.exprs[:-1] + other.exprs[:] self.strRepr = None self.mayReturnEmpty |= other.mayReturnEmpty - self.mayIndexError |= other.mayIndexError + self.mayIndexError |= other.mayIndexError self.errmsg = "Expected " + _ustr(self) - + return self - def setResultsName( self, name, listAllMatches=False ): - ret = super(ParseExpression,self).setResultsName(name,listAllMatches) + def setResultsName(self, name, listAllMatches=False): + ret = super(ParseExpression, self).setResultsName(name, listAllMatches) return ret - def validate( self, validateTrace=[] ): - tmp = validateTrace[:]+[self] + def validate(self, validateTrace=[]): + tmp = validateTrace[:] + [self] for e in self.exprs: e.validate(tmp) - self.checkRecursion( [] ) - + self.checkRecursion([]) + def copy(self): - ret = super(ParseExpression,self).copy() + ret = super(ParseExpression, self).copy() ret.exprs = [e.copy() for e in self.exprs] return ret + class And(ParseExpression): """ Requires all given C{ParseExpression}s to be found in the given order. @@ -3356,21 +3790,23 @@ class And(ParseExpression): class _ErrorStop(Empty): def __init__(self, *args, **kwargs): - super(And._ErrorStop,self).__init__(*args, **kwargs) - self.name = '-' + super(And._ErrorStop, self).__init__(*args, **kwargs) + self.name = "-" self.leaveWhitespace() - def __init__( self, exprs, savelist = True ): - super(And,self).__init__(exprs, savelist) + def __init__(self, exprs, savelist=True): + super(And, self).__init__(exprs, savelist) self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - self.setWhitespaceChars( self.exprs[0].whiteChars ) + self.setWhitespaceChars(self.exprs[0].whiteChars) self.skipWhitespace = self.exprs[0].skipWhitespace self.callPreparse = True - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): # pass False as last arg to _parse for first element, since we already # pre-parsed the string as part of our And pre-parsing - loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) + loc, resultlist = self.exprs[0]._parse( + instring, loc, doActions, callPreParse=False + ) errorStop = False for e in self.exprs[1:]: if isinstance(e, And._ErrorStop): @@ -3378,34 +3814,36 @@ def parseImpl( self, instring, loc, doActions=True ): continue if errorStop: try: - loc, exprtokens = e._parse( instring, loc, doActions ) + loc, exprtokens = e._parse(instring, loc, doActions) except ParseSyntaxException: raise except ParseBaseException as pe: pe.__traceback__ = None raise ParseSyntaxException._from_exception(pe) except IndexError: - raise ParseSyntaxException(instring, len(instring), self.errmsg, self) + raise ParseSyntaxException( + instring, len(instring), self.errmsg, self + ) else: - loc, exprtokens = e._parse( instring, loc, doActions ) + loc, exprtokens = e._parse(instring, loc, doActions) if exprtokens or exprtokens.haskeys(): resultlist += exprtokens return loc, resultlist - def __iadd__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #And( [ self, other ] ) + def __iadd__(self, other): + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + return self.append(other) # And( [ self, other ] ) - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion( subRecCheckList ) + e.checkRecursion(subRecCheckList) if not e.mayReturnEmpty: break - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3422,26 +3860,27 @@ class Or(ParseExpression): Example:: # construct Or using '^' operator - + number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) print(number.searchString("123 3.1416 789")) prints:: [['123'], ['3.1416'], ['789']] """ - def __init__( self, exprs, savelist = False ): - super(Or,self).__init__(exprs, savelist) + + def __init__(self, exprs, savelist=False): + super(Or, self).__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) else: self.mayReturnEmpty = True - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): maxExcLoc = -1 maxException = None matches = [] for e in self.exprs: try: - loc2 = e.tryParse( instring, loc ) + loc2 = e.tryParse(instring, loc) except ParseException as err: err.__traceback__ = None if err.loc > maxExcLoc: @@ -3449,7 +3888,9 @@ def parseImpl( self, instring, loc, doActions=True ): maxExcLoc = err.loc except IndexError: if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) + maxException = ParseException( + instring, len(instring), e.errmsg, self + ) maxExcLoc = len(instring) else: # save match among all matches, to retry longest to shortest @@ -3457,9 +3898,9 @@ def parseImpl( self, instring, loc, doActions=True ): if matches: matches.sort(key=lambda x: -x[0]) - for _,e in matches: + for _, e in matches: try: - return e._parse( instring, loc, doActions ) + return e._parse(instring, loc, doActions) except ParseException as err: err.__traceback__ = None if err.loc > maxExcLoc: @@ -3470,16 +3911,17 @@ def parseImpl( self, instring, loc, doActions=True ): maxException.msg = self.errmsg raise maxException else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - + raise ParseException( + instring, loc, "no defined alternatives to match", self + ) - def __ixor__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #Or( [ self, other ] ) + def __ixor__(self, other): + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + return self.append(other) # Or( [ self, other ] ) - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3487,10 +3929,10 @@ def __str__( self ): return self.strRepr - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion( subRecCheckList ) + e.checkRecursion(subRecCheckList) class MatchFirst(ParseExpression): @@ -3501,7 +3943,7 @@ class MatchFirst(ParseExpression): Example:: # construct MatchFirst using '|' operator - + # watch the order of expressions to match number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] @@ -3510,19 +3952,20 @@ class MatchFirst(ParseExpression): number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] """ - def __init__( self, exprs, savelist = False ): - super(MatchFirst,self).__init__(exprs, savelist) + + def __init__(self, exprs, savelist=False): + super(MatchFirst, self).__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) else: self.mayReturnEmpty = True - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): maxExcLoc = -1 maxException = None for e in self.exprs: try: - ret = e._parse( instring, loc, doActions ) + ret = e._parse(instring, loc, doActions) return ret except ParseException as err: if err.loc > maxExcLoc: @@ -3530,7 +3973,9 @@ def parseImpl( self, instring, loc, doActions=True ): maxExcLoc = err.loc except IndexError: if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) + maxException = ParseException( + instring, len(instring), e.errmsg, self + ) maxExcLoc = len(instring) # only got here if no expression matched, raise exception for match that made it the furthest @@ -3539,15 +3984,17 @@ def parseImpl( self, instring, loc, doActions=True ): maxException.msg = self.errmsg raise maxException else: - raise ParseException(instring, loc, "no defined alternatives to match", self) + raise ParseException( + instring, loc, "no defined alternatives to match", self + ) - def __ior__(self, other ): - if isinstance( other, basestring ): - other = ParserElement._literalStringClass( other ) - return self.append( other ) #MatchFirst( [ self, other ] ) + def __ior__(self, other): + if isinstance(other, basestring): + other = ParserElement._literalStringClass(other) + return self.append(other) # MatchFirst( [ self, other ] ) - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3555,10 +4002,10 @@ def __str__( self ): return self.strRepr - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion( subRecCheckList ) + e.checkRecursion(subRecCheckList) class Each(ParseExpression): @@ -3576,7 +4023,7 @@ class Each(ParseExpression): color_attr = "color:" + color("color") size_attr = "size:" + integer("size") - # use Each (using operator '&') to accept attributes in any order + # use Each (using operator '&') to accept attributes in any order # (shape and posn are required, color and size are optional) shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) @@ -3615,26 +4062,41 @@ class Each(ParseExpression): - shape: TRIANGLE - size: 20 """ - def __init__( self, exprs, savelist = True ): - super(Each,self).__init__(exprs, savelist) + + def __init__(self, exprs, savelist=True): + super(Each, self).__init__(exprs, savelist) self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) self.skipWhitespace = True self.initExprGroups = True - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if self.initExprGroups: - self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) - opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] - opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] + self.opt1map = dict( + (id(e.expr), e) for e in self.exprs if isinstance(e, Optional) + ) + opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)] + opt2 = [ + e + for e in self.exprs + if e.mayReturnEmpty and not isinstance(e, Optional) + ] self.optionals = opt1 + opt2 - self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] - self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] - self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] + self.multioptionals = [ + e.expr for e in self.exprs if isinstance(e, ZeroOrMore) + ] + self.multirequired = [ + e.expr for e in self.exprs if isinstance(e, OneOrMore) + ] + self.required = [ + e + for e in self.exprs + if not isinstance(e, (Optional, ZeroOrMore, OneOrMore)) + ] self.required += self.multirequired self.initExprGroups = False tmpLoc = loc tmpReqd = self.required[:] - tmpOpt = self.optionals[:] + tmpOpt = self.optionals[:] matchOrder = [] keepMatching = True @@ -3643,11 +4105,11 @@ def parseImpl( self, instring, loc, doActions=True ): failed = [] for e in tmpExprs: try: - tmpLoc = e.tryParse( instring, tmpLoc ) + tmpLoc = e.tryParse(instring, tmpLoc) except ParseException: failed.append(e) else: - matchOrder.append(self.opt1map.get(id(e),e)) + matchOrder.append(self.opt1map.get(id(e), e)) if e in tmpReqd: tmpReqd.remove(e) elif e in tmpOpt: @@ -3657,21 +4119,25 @@ def parseImpl( self, instring, loc, doActions=True ): if tmpReqd: missing = ", ".join(_ustr(e) for e in tmpReqd) - raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) + raise ParseException( + instring, loc, "Missing one or more required elements (%s)" % missing + ) # add any unmatched Optionals, in case they have default values defined - matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] + matchOrder += [ + e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt + ] resultlist = [] for e in matchOrder: - loc,results = e._parse(instring,loc,doActions) + loc, results = e._parse(instring, loc, doActions) resultlist.append(results) finalResults = sum(resultlist, ParseResults([])) return loc, finalResults - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3679,19 +4145,20 @@ def __str__( self ): return self.strRepr - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion( subRecCheckList ) + e.checkRecursion(subRecCheckList) class ParseElementEnhance(ParserElement): """ Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. """ - def __init__( self, expr, savelist=False ): - super(ParseElementEnhance,self).__init__(savelist) - if isinstance( expr, basestring ): + + def __init__(self, expr, savelist=False): + super(ParseElementEnhance, self).__init__(savelist) + if isinstance(expr, basestring): if issubclass(ParserElement._literalStringClass, Token): expr = ParserElement._literalStringClass(expr) else: @@ -3701,64 +4168,64 @@ def __init__( self, expr, savelist=False ): if expr is not None: self.mayIndexError = expr.mayIndexError self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars( expr.whiteChars ) + self.setWhitespaceChars(expr.whiteChars) self.skipWhitespace = expr.skipWhitespace self.saveAsList = expr.saveAsList self.callPreparse = expr.callPreparse self.ignoreExprs.extend(expr.ignoreExprs) - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if self.expr is not None: - return self.expr._parse( instring, loc, doActions, callPreParse=False ) + return self.expr._parse(instring, loc, doActions, callPreParse=False) else: - raise ParseException("",loc,self.errmsg,self) + raise ParseException("", loc, self.errmsg, self) - def leaveWhitespace( self ): + def leaveWhitespace(self): self.skipWhitespace = False self.expr = self.expr.copy() if self.expr is not None: self.expr.leaveWhitespace() return self - def ignore( self, other ): - if isinstance( other, Suppress ): + def ignore(self, other): + if isinstance(other, Suppress): if other not in self.ignoreExprs: - super( ParseElementEnhance, self).ignore( other ) + super(ParseElementEnhance, self).ignore(other) if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) + self.expr.ignore(self.ignoreExprs[-1]) else: - super( ParseElementEnhance, self).ignore( other ) + super(ParseElementEnhance, self).ignore(other) if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) + self.expr.ignore(self.ignoreExprs[-1]) return self - def streamline( self ): - super(ParseElementEnhance,self).streamline() + def streamline(self): + super(ParseElementEnhance, self).streamline() if self.expr is not None: self.expr.streamline() return self - def checkRecursion( self, parseElementList ): + def checkRecursion(self, parseElementList): if self in parseElementList: - raise RecursiveGrammarException( parseElementList+[self] ) - subRecCheckList = parseElementList[:] + [ self ] + raise RecursiveGrammarException(parseElementList + [self]) + subRecCheckList = parseElementList[:] + [self] if self.expr is not None: - self.expr.checkRecursion( subRecCheckList ) + self.expr.checkRecursion(subRecCheckList) - def validate( self, validateTrace=[] ): - tmp = validateTrace[:]+[self] + def validate(self, validateTrace=[]): + tmp = validateTrace[:] + [self] if self.expr is not None: self.expr.validate(tmp) - self.checkRecursion( [] ) + self.checkRecursion([]) - def __str__( self ): + def __str__(self): try: - return super(ParseElementEnhance,self).__str__() + return super(ParseElementEnhance, self).__str__() except Exception: pass if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) + self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr)) return self.strRepr @@ -3774,17 +4241,18 @@ class FollowedBy(ParseElementEnhance): data_word = Word(alphas) label = data_word + FollowedBy(':') attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - + OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() prints:: [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] """ - def __init__( self, expr ): - super(FollowedBy,self).__init__(expr) + + def __init__(self, expr): + super(FollowedBy, self).__init__(expr) self.mayReturnEmpty = True - def parseImpl( self, instring, loc, doActions=True ): - self.expr.tryParse( instring, loc ) + def parseImpl(self, instring, loc, doActions=True): + self.expr.tryParse(instring, loc) return loc, [] @@ -3797,22 +4265,25 @@ class NotAny(ParseElementEnhance): always returns a null token list. May be constructed using the '~' operator. Example:: - + """ - def __init__( self, expr ): - super(NotAny,self).__init__(expr) - #~ self.leaveWhitespace() - self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs + + def __init__(self, expr): + super(NotAny, self).__init__(expr) + # ~ self.leaveWhitespace() + self.skipWhitespace = ( + False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs + ) self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, "+_ustr(self.expr) + self.errmsg = "Found unwanted token, " + _ustr(self.expr) - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): if self.expr.canParseNext(instring, loc): raise ParseException(instring, loc, self.errmsg, self) return loc, [] - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3820,8 +4291,9 @@ def __str__( self ): return self.strRepr + class _MultipleMatch(ParseElementEnhance): - def __init__( self, expr, stopOn=None): + def __init__(self, expr, stopOn=None): super(_MultipleMatch, self).__init__(expr) self.saveAsList = True ender = stopOn @@ -3829,44 +4301,45 @@ def __init__( self, expr, stopOn=None): ender = ParserElement._literalStringClass(ender) self.not_ender = ~ender if ender is not None else None - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): self_expr_parse = self.expr._parse self_skip_ignorables = self._skipIgnorables check_ender = self.not_ender is not None if check_ender: try_not_ender = self.not_ender.tryParse - + # must be at least one (but first see if we are the stopOn sentinel; # if so, fail) if check_ender: try_not_ender(instring, loc) - loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) + loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False) try: - hasIgnoreExprs = (not not self.ignoreExprs) + hasIgnoreExprs = not not self.ignoreExprs while 1: if check_ender: try_not_ender(instring, loc) if hasIgnoreExprs: - preloc = self_skip_ignorables( instring, loc ) + preloc = self_skip_ignorables(instring, loc) else: preloc = loc - loc, tmptokens = self_expr_parse( instring, preloc, doActions ) + loc, tmptokens = self_expr_parse(instring, preloc, doActions) if tmptokens or tmptokens.haskeys(): tokens += tmptokens - except (ParseException,IndexError): + except (ParseException, IndexError): pass return loc, tokens - + + class OneOrMore(_MultipleMatch): """ Repetition of one or more of the given expression. - + Parameters: - expr - expression that must match one or more times - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) + (only required if the sentinel would ordinarily match the repetition + expression) Example:: data_word = Word(alphas) @@ -3879,13 +4352,13 @@ class OneOrMore(_MultipleMatch): # use stopOn attribute for OneOrMore to avoid reading label string as part of the data attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] - + # could also be written as (attr_expr * (1,)).parseString(text).pprint() """ - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3893,30 +4366,32 @@ def __str__( self ): return self.strRepr + class ZeroOrMore(_MultipleMatch): """ Optional repetition of zero or more of the given expression. - + Parameters: - expr - expression that must match zero or more times - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) + (only required if the sentinel would ordinarily match the repetition + expression) Example: similar to L{OneOrMore} """ - def __init__( self, expr, stopOn=None): - super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) + + def __init__(self, expr, stopOn=None): + super(ZeroOrMore, self).__init__(expr, stopOn=stopOn) self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): + + def parseImpl(self, instring, loc, doActions=True): try: return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) - except (ParseException,IndexError): + except (ParseException, IndexError): return loc, [] - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3924,14 +4399,20 @@ def __str__( self ): return self.strRepr + class _NullToken(object): def __bool__(self): return False + __nonzero__ = __bool__ + def __str__(self): return "" + _optionalNotMatched = _NullToken() + + class Optional(ParseElementEnhance): """ Optional matching of the given expression. @@ -3946,10 +4427,10 @@ class Optional(ParseElementEnhance): zip.runTests(''' # traditional ZIP code 12345 - + # ZIP+4 form 12101-0001 - + # invalid ZIP 98765- ''') @@ -3967,28 +4448,29 @@ class Optional(ParseElementEnhance): ^ FAIL: Expected end of text (at char 5), (line:1, col:6) """ - def __init__( self, expr, default=_optionalNotMatched ): - super(Optional,self).__init__( expr, savelist=False ) + + def __init__(self, expr, default=_optionalNotMatched): + super(Optional, self).__init__(expr, savelist=False) self.saveAsList = self.expr.saveAsList self.defaultValue = default self.mayReturnEmpty = True - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - except (ParseException,IndexError): + loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) + except (ParseException, IndexError): if self.defaultValue is not _optionalNotMatched: if self.expr.resultsName: - tokens = ParseResults([ self.defaultValue ]) + tokens = ParseResults([self.defaultValue]) tokens[self.expr.resultsName] = self.defaultValue else: - tokens = [ self.defaultValue ] + tokens = [self.defaultValue] else: tokens = [] return loc, tokens - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name if self.strRepr is None: @@ -3996,18 +4478,19 @@ def __str__( self ): return self.strRepr + class SkipTo(ParseElementEnhance): """ Token for skipping over all undefined text until the matched expression is found. Parameters: - expr - target expression marking the end of the data to be skipped - - include - (default=C{False}) if True, the target expression is also parsed + - include - (default=C{False}) if True, the target expression is also parsed (the skipped text and target expression are returned as a 2-element list). - - ignore - (default=C{None}) used to define grammars (typically quoted strings and + - ignore - (default=C{None}) used to define grammars (typically quoted strings and comments) that might contain false matches to the target expression - - failOn - (default=C{None}) define expressions that are not allowed to be - included in the skipped test; if found before the target expression is found, + - failOn - (default=C{None}) define expressions that are not allowed to be + included in the skipped test; if found before the target expression is found, the SkipTo is not a match Example:: @@ -4027,11 +4510,11 @@ class SkipTo(ParseElementEnhance): # - parse action will call token.strip() for each matched token, i.e., the description body string_data = SkipTo(SEP, ignore=quotedString) string_data.setParseAction(tokenMap(str.strip)) - ticket_expr = (integer("issue_num") + SEP - + string_data("sev") + SEP - + string_data("desc") + SEP + ticket_expr = (integer("issue_num") + SEP + + string_data("sev") + SEP + + string_data("desc") + SEP + integer("days_open")) - + for tkt in ticket_expr.searchString(report): print tkt.dump() prints:: @@ -4051,8 +4534,9 @@ class SkipTo(ParseElementEnhance): - issue_num: 79 - sev: Minor """ - def __init__( self, other, include=False, ignore=None, failOn=None ): - super( SkipTo, self ).__init__( other ) + + def __init__(self, other, include=False, ignore=None, failOn=None): + super(SkipTo, self).__init__(other) self.ignoreExpr = ignore self.mayReturnEmpty = True self.mayIndexError = False @@ -4062,23 +4546,27 @@ def __init__( self, other, include=False, ignore=None, failOn=None ): self.failOn = ParserElement._literalStringClass(failOn) else: self.failOn = failOn - self.errmsg = "No match found for "+_ustr(self.expr) + self.errmsg = "No match found for " + _ustr(self.expr) - def parseImpl( self, instring, loc, doActions=True ): + def parseImpl(self, instring, loc, doActions=True): startloc = loc instrlen = len(instring) expr = self.expr expr_parse = self.expr._parse - self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None - self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None - + self_failOn_canParseNext = ( + self.failOn.canParseNext if self.failOn is not None else None + ) + self_ignoreExpr_tryParse = ( + self.ignoreExpr.tryParse if self.ignoreExpr is not None else None + ) + tmploc = loc while tmploc <= instrlen: if self_failOn_canParseNext is not None: # break if failOn expression matches if self_failOn_canParseNext(instring, tmploc): break - + if self_ignoreExpr_tryParse is not None: # advance past ignore expressions while 1: @@ -4086,7 +4574,7 @@ def parseImpl( self, instring, loc, doActions=True ): tmploc = self_ignoreExpr_tryParse(instring, tmploc) except ParseBaseException: break - + try: expr_parse(instring, tmploc, doActions=False, callPreParse=False) except (ParseException, IndexError): @@ -4104,13 +4592,14 @@ def parseImpl( self, instring, loc, doActions=True ): loc = tmploc skiptext = instring[startloc:loc] skipresult = ParseResults(skiptext) - + if self.includeMatch: - loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) + loc, mat = expr_parse(instring, loc, doActions, callPreParse=False) skipresult += mat return loc, skipresult + class Forward(ParseElementEnhance): """ Forward declaration of an expression to be defined later - @@ -4130,45 +4619,46 @@ class Forward(ParseElementEnhance): See L{ParseResults.pprint} for an example of a recursive parser created using C{Forward}. """ - def __init__( self, other=None ): - super(Forward,self).__init__( other, savelist=False ) - def __lshift__( self, other ): - if isinstance( other, basestring ): + def __init__(self, other=None): + super(Forward, self).__init__(other, savelist=False) + + def __lshift__(self, other): + if isinstance(other, basestring): other = ParserElement._literalStringClass(other) self.expr = other self.strRepr = None self.mayIndexError = self.expr.mayIndexError self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars( self.expr.whiteChars ) + self.setWhitespaceChars(self.expr.whiteChars) self.skipWhitespace = self.expr.skipWhitespace self.saveAsList = self.expr.saveAsList self.ignoreExprs.extend(self.expr.ignoreExprs) return self - + def __ilshift__(self, other): return self << other - - def leaveWhitespace( self ): + + def leaveWhitespace(self): self.skipWhitespace = False return self - def streamline( self ): + def streamline(self): if not self.streamlined: self.streamlined = True if self.expr is not None: self.expr.streamline() return self - def validate( self, validateTrace=[] ): + def validate(self, validateTrace=[]): if self not in validateTrace: - tmp = validateTrace[:]+[self] + tmp = validateTrace[:] + [self] if self.expr is not None: self.expr.validate(tmp) self.checkRecursion([]) - def __str__( self ): - if hasattr(self,"name"): + def __str__(self): + if hasattr(self, "name"): return self.name return self.__class__.__name__ + ": ..." @@ -4186,24 +4676,28 @@ def __str__( self ): def copy(self): if self.expr is not None: - return super(Forward,self).copy() + return super(Forward, self).copy() else: ret = Forward() ret <<= self return ret + class _ForwardNoRecurse(Forward): - def __str__( self ): + def __str__(self): return "..." + class TokenConverter(ParseElementEnhance): """ Abstract subclass of C{ParseExpression}, for converting parsed results. """ - def __init__( self, expr, savelist=False ): - super(TokenConverter,self).__init__( expr )#, savelist ) + + def __init__(self, expr, savelist=False): + super(TokenConverter, self).__init__(expr) # , savelist ) self.saveAsList = False + class Combine(TokenConverter): """ Converter to concatenate all matching tokens to a single string. @@ -4221,8 +4715,9 @@ class Combine(TokenConverter): # no match when there are internal spaces print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) """ - def __init__( self, expr, joinString="", adjacent=True ): - super(Combine,self).__init__( expr ) + + def __init__(self, expr, joinString="", adjacent=True): + super(Combine, self).__init__(expr) # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself if adjacent: self.leaveWhitespace() @@ -4231,23 +4726,26 @@ def __init__( self, expr, joinString="", adjacent=True ): self.joinString = joinString self.callPreparse = True - def ignore( self, other ): + def ignore(self, other): if self.adjacent: ParserElement.ignore(self, other) else: - super( Combine, self).ignore( other ) + super(Combine, self).ignore(other) return self - def postParse( self, instring, loc, tokenlist ): + def postParse(self, instring, loc, tokenlist): retToks = tokenlist.copy() del retToks[:] - retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) + retToks += ParseResults( + ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults + ) if self.resultsName and retToks.haskeys(): - return [ retToks ] + return [retToks] else: return retToks + class Group(TokenConverter): """ Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. @@ -4262,12 +4760,14 @@ class Group(TokenConverter): func = ident + Group(Optional(delimitedList(term))) print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] """ - def __init__( self, expr ): - super(Group,self).__init__( expr ) + + def __init__(self, expr): + super(Group, self).__init__(expr) self.saveAsList = True - def postParse( self, instring, loc, tokenlist ): - return [ tokenlist ] + def postParse(self, instring, loc, tokenlist): + return [tokenlist] + class Dict(TokenConverter): """ @@ -4282,16 +4782,16 @@ class Dict(TokenConverter): text = "shape: SQUARE posn: upper left color: light blue texture: burlap" attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - + # print attributes as plain groups print(OneOrMore(attr_expr).parseString(text).dump()) - + # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names result = Dict(OneOrMore(Group(attr_expr))).parseString(text) print(result.dump()) - + # access named fields as dict entries, or output as dict - print(result['shape']) + print(result['shape']) print(result.asDict()) prints:: ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] @@ -4305,31 +4805,34 @@ class Dict(TokenConverter): {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} See more examples at L{ParseResults} of accessing fields by results name. """ - def __init__( self, expr ): - super(Dict,self).__init__( expr ) + + def __init__(self, expr): + super(Dict, self).__init__(expr) self.saveAsList = True - def postParse( self, instring, loc, tokenlist ): - for i,tok in enumerate(tokenlist): + def postParse(self, instring, loc, tokenlist): + for i, tok in enumerate(tokenlist): if len(tok) == 0: continue ikey = tok[0] - if isinstance(ikey,int): + if isinstance(ikey, int): ikey = _ustr(tok[0]).strip() - if len(tok)==1: - tokenlist[ikey] = _ParseResultsWithOffset("",i) - elif len(tok)==2 and not isinstance(tok[1],ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) + if len(tok) == 1: + tokenlist[ikey] = _ParseResultsWithOffset("", i) + elif len(tok) == 2 and not isinstance(tok[1], ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) else: - dictvalue = tok.copy() #ParseResults(i) + dictvalue = tok.copy() # ParseResults(i) del dictvalue[0] - if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) + if len(dictvalue) != 1 or ( + isinstance(dictvalue, ParseResults) and dictvalue.haskeys() + ): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) if self.resultsName: - return [ tokenlist ] + return [tokenlist] else: return tokenlist @@ -4353,10 +4856,11 @@ class Suppress(TokenConverter): ['a', 'b', 'c', 'd'] (See also L{delimitedList}.) """ - def postParse( self, instring, loc, tokenlist ): + + def postParse(self, instring, loc, tokenlist): return [] - def suppress( self ): + def suppress(self): return self @@ -4364,22 +4868,26 @@ class OnlyOnce(object): """ Wrapper for parse actions, to ensure they are only called once. """ + def __init__(self, methodCall): self.callable = _trim_arity(methodCall) self.called = False - def __call__(self,s,l,t): + + def __call__(self, s, l, t): if not self.called: - results = self.callable(s,l,t) + results = self.callable(s, l, t) self.called = True return results - raise ParseException(s,l,"") + raise ParseException(s, l, "") + def reset(self): self.called = False + def traceParseAction(f): """ - Decorator for debugging parse actions. - + Decorator for debugging parse actions. + When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. @@ -4398,29 +4906,34 @@ def remove_duplicate_chars(tokens): ['dfjkls'] """ f = _trim_arity(f) + def z(*paArgs): thisFunc = f.__name__ - s,l,t = paArgs[-3:] - if len(paArgs)>3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc - sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) + s, l, t = paArgs[-3:] + if len(paArgs) > 3: + thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc + sys.stderr.write( + ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t) + ) try: ret = f(*paArgs) except Exception as exc: - sys.stderr.write( "< ['aa', 'bb', 'cc'] delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] """ - dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." + dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..." if combine: - return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) + return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName) else: - return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) + return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName) -def countedArray( expr, intExpr=None ): + +def countedArray(expr, intExpr=None): """ Helper to define a counted list of expressions. This helper defines a pattern of the form:: integer expr expr expr... where the leading integer tells how many expr expressions follow. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. - + If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. Example:: @@ -4458,27 +4972,31 @@ def countedArray( expr, intExpr=None ): countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] """ arrayExpr = Forward() - def countFieldParseAction(s,l,t): + + def countFieldParseAction(s, l, t): n = t[0] - arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) + arrayExpr << (n and Group(And([expr] * n)) or Group(empty)) return [] + if intExpr is None: - intExpr = Word(nums).setParseAction(lambda t:int(t[0])) + intExpr = Word(nums).setParseAction(lambda t: int(t[0])) else: intExpr = intExpr.copy() intExpr.setName("arrayLen") intExpr.addParseAction(countFieldParseAction, callDuringTry=True) - return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') + return (intExpr + arrayExpr).setName("(len) " + _ustr(expr) + "...") + def _flatten(L): ret = [] for i in L: - if isinstance(i,list): + if isinstance(i, list): ret.extend(_flatten(i)) else: ret.append(i) return ret + def matchPreviousLiteral(expr): """ Helper to define an expression that is indirectly defined from @@ -4493,7 +5011,8 @@ def matchPreviousLiteral(expr): Do I{not} use with packrat parsing enabled. """ rep = Forward() - def copyTokenToRepeater(s,l,t): + + def copyTokenToRepeater(s, l, t): if t: if len(t) == 1: rep << t[0] @@ -4503,10 +5022,12 @@ def copyTokenToRepeater(s,l,t): rep << And(Literal(tt) for tt in tflat) else: rep << Empty() + expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) + rep.setName("(prev) " + _ustr(expr)) return rep + def matchPreviousExpr(expr): """ Helper to define an expression that is indirectly defined from @@ -4524,26 +5045,32 @@ def matchPreviousExpr(expr): rep = Forward() e2 = expr.copy() rep <<= e2 - def copyTokenToRepeater(s,l,t): + + def copyTokenToRepeater(s, l, t): matchTokens = _flatten(t.asList()) - def mustMatchTheseTokens(s,l,t): + + def mustMatchTheseTokens(s, l, t): theseTokens = _flatten(t.asList()) - if theseTokens != matchTokens: - raise ParseException("",0,"") - rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) + if theseTokens != matchTokens: + raise ParseException("", 0, "") + + rep.setParseAction(mustMatchTheseTokens, callDuringTry=True) + expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) + rep.setName("(prev) " + _ustr(expr)) return rep + def _escapeRegexRangeChars(s): - #~ escape these chars: ^-] + # ~ escape these chars: ^-] for c in r"\^-]": - s = s.replace(c,_bslash+c) - s = s.replace("\n",r"\n") - s = s.replace("\t",r"\t") + s = s.replace(c, _bslash + c) + s = s.replace("\n", r"\n") + s = s.replace("\t", r"\t") return _ustr(s) -def oneOf( strs, caseless=False, useRegex=True ): + +def oneOf(strs, caseless=False, useRegex=True): """ Helper to quickly define a set of alternative Literals, and makes sure to do longest-first testing when there is a conflict, regardless of the input order, @@ -4567,56 +5094,68 @@ def oneOf( strs, caseless=False, useRegex=True ): [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] """ if caseless: - isequal = ( lambda a,b: a.upper() == b.upper() ) - masks = ( lambda a,b: b.upper().startswith(a.upper()) ) + isequal = lambda a, b: a.upper() == b.upper() + masks = lambda a, b: b.upper().startswith(a.upper()) parseElementClass = CaselessLiteral else: - isequal = ( lambda a,b: a == b ) - masks = ( lambda a,b: b.startswith(a) ) + isequal = lambda a, b: a == b + masks = lambda a, b: b.startswith(a) parseElementClass = Literal symbols = [] - if isinstance(strs,basestring): + if isinstance(strs, basestring): symbols = strs.split() elif isinstance(strs, collections.Iterable): symbols = list(strs) else: - warnings.warn("Invalid argument to oneOf, expected string or iterable", - SyntaxWarning, stacklevel=2) + warnings.warn( + "Invalid argument to oneOf, expected string or iterable", + SyntaxWarning, + stacklevel=2, + ) if not symbols: return NoMatch() i = 0 - while i < len(symbols)-1: + while i < len(symbols) - 1: cur = symbols[i] - for j,other in enumerate(symbols[i+1:]): - if ( isequal(other, cur) ): - del symbols[i+j+1] + for j, other in enumerate(symbols[i + 1 :]): + if isequal(other, cur): + del symbols[i + j + 1] break - elif ( masks(cur, other) ): - del symbols[i+j+1] - symbols.insert(i,other) + elif masks(cur, other): + del symbols[i + j + 1] + symbols.insert(i, other) cur = other break else: i += 1 if not caseless and useRegex: - #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) + # ~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) try: - if len(symbols)==len("".join(symbols)): - return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) + if len(symbols) == len("".join(symbols)): + return Regex( + "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) + ).setName(" | ".join(symbols)) else: - return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) + return Regex("|".join(re.escape(sym) for sym in symbols)).setName( + " | ".join(symbols) + ) except Exception: - warnings.warn("Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, stacklevel=2) - + warnings.warn( + "Exception creating Regex for oneOf, building MatchFirst", + SyntaxWarning, + stacklevel=2, + ) # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) + return MatchFirst(parseElementClass(sym) for sym in symbols).setName( + " | ".join(symbols) + ) + -def dictOf( key, value ): +def dictOf(key, value): """ Helper to easily and clearly define a dictionary by specifying the respective patterns for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens @@ -4629,7 +5168,7 @@ def dictOf( key, value ): text = "shape: SQUARE posn: upper left color: light blue texture: burlap" attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) print(OneOrMore(attr_expr).parseString(text).dump()) - + attr_label = label attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) @@ -4649,18 +5188,19 @@ def dictOf( key, value ): SQUARE {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} """ - return Dict( ZeroOrMore( Group ( key + value ) ) ) + return Dict(ZeroOrMore(Group(key + value))) + def originalTextFor(expr, asString=True): """ Helper to return the original, untokenized text for a given expression. Useful to restore the parsed fields of an HTML start tag into the raw tag text itself, or to revert separate tokens with intervening whitespace back to the original matching - input text. By default, returns astring containing the original parsed text. - - If the optional C{asString} argument is passed as C{False}, then the return value is a - C{L{ParseResults}} containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if + input text. By default, returns astring containing the original parsed text. + + If the optional C{asString} argument is passed as C{False}, then the return value is a + C{L{ParseResults}} containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if the expression passed to C{L{originalTextFor}} contains expressions with defined results names, you must set C{asString} to C{False} if you want to preserve those results name values. @@ -4675,25 +5215,29 @@ def originalTextFor(expr, asString=True): [' bold text '] ['text'] """ - locMarker = Empty().setParseAction(lambda s,loc,t: loc) + locMarker = Empty().setParseAction(lambda s, loc, t: loc) endlocMarker = locMarker.copy() endlocMarker.callPreparse = False matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") if asString: - extractText = lambda s,l,t: s[t._original_start:t._original_end] + extractText = lambda s, l, t: s[t._original_start : t._original_end] else: - def extractText(s,l,t): - t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] + + def extractText(s, l, t): + t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] + matchExpr.setParseAction(extractText) matchExpr.ignoreExprs = expr.ignoreExprs return matchExpr -def ungroup(expr): + +def ungroup(expr): """ Helper to undo pyparsing's default grouping of And expressions, even if all but one are non-empty. """ - return TokenConverter(expr).setParseAction(lambda t:t[0]) + return TokenConverter(expr).setParseAction(lambda t: t[0]) + def locatedExpr(expr): """ @@ -4715,23 +5259,45 @@ def locatedExpr(expr): [[8, 'lksdjjf', 15]] [[18, 'lkkjj', 23]] """ - locator = Empty().setParseAction(lambda s,l,t: l) - return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) + locator = Empty().setParseAction(lambda s, l, t: l) + return Group( + locator("locn_start") + + expr("value") + + locator.copy().leaveWhitespace()("locn_end") + ) # convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") +empty = Empty().setName("empty") +lineStart = LineStart().setName("lineStart") +lineEnd = LineEnd().setName("lineEnd") stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) -_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) -_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) -_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) +stringEnd = StringEnd().setName("stringEnd") + +_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction( + lambda s, l, t: t[0][1] +) +_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction( + lambda s, l, t: unichr(int(t[0].lstrip(r"\0x"), 16)) +) +_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction( + lambda s, l, t: unichr(int(t[0][1:], 8)) +) +_singleChar = ( + _escapedPunc + | _escapedHexChar + | _escapedOctChar + | Word(printables, excludeChars=r"\]", exact=1) + | Regex(r"\w", re.UNICODE) +) _charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" +_reBracketExpr = ( + Literal("[") + + Optional("^").setResultsName("negate") + + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + + "]" +) + def srange(s): r""" @@ -4745,28 +5311,36 @@ def srange(s): The values enclosed in the []'s may be: - a single character - an escaped character with a leading backslash (such as C{\-} or C{\]}) - - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) - (C{\0x##} is also supported for backwards compatibility) + - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) + (C{\0x##} is also supported for backwards compatibility) - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) """ - _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) + _expanded = ( + lambda p: p + if not isinstance(p, ParseResults) + else "".join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) + ) try: return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) except Exception: return "" + def matchOnlyAtCol(n): """ Helper method for defining parse actions that require matching at a specific column in the input text. """ - def verifyCol(strg,locn,toks): - if col(locn,strg) != n: - raise ParseException(strg,locn,"matched token not at column %d" % n) + + def verifyCol(strg, locn, toks): + if col(locn, strg) != n: + raise ParseException(strg, locn, "matched token not at column %d" % n) + return verifyCol + def replaceWith(replStr): """ Helper method for common parse actions that simply return a literal value. Especially @@ -4776,12 +5350,13 @@ def replaceWith(replStr): num = Word(nums).setParseAction(lambda toks: int(toks[0])) na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) term = na | num - + OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] """ - return lambda s,l,t: [replStr] + return lambda s, l, t: [replStr] + -def removeQuotes(s,l,t): +def removeQuotes(s, l, t): """ Helper parse action for removing quotation marks from parsed quoted strings. @@ -4795,9 +5370,10 @@ def removeQuotes(s,l,t): """ return t[0][1:-1] + def tokenMap(func, *args): """ - Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional + Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional args are passed, they are forwarded to the given function as additional arguments after the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the parsed data to an integer using base 16. @@ -4807,7 +5383,7 @@ def tokenMap(func, *args): hex_ints.runTests(''' 00 11 22 aa FF 0a 0d 1a ''') - + upperword = Word(alphas).setParseAction(tokenMap(str.upper)) OneOrMore(upperword).runTests(''' my kingdom for a horse @@ -4827,53 +5403,80 @@ def tokenMap(func, *args): now is the winter of our discontent made glorious summer by this sun of york ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] """ - def pa(s,l,t): + + def pa(s, l, t): return [func(tokn, *args) for tokn in t] try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) + func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) except Exception: func_name = str(func) pa.__name__ = func_name return pa + upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" - + + def _makeTags(tagStr, xml): """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr,basestring): + if isinstance(tagStr, basestring): resname = tagStr tagStr = Keyword(tagStr, caseless=not xml) else: resname = tagStr.name - tagAttrName = Word(alphas,alphanums+"_-:") - if (xml): - tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + tagAttrName = Word(alphas, alphanums + "_-:") + if xml: + tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) + openTag = ( + Suppress("<") + + tagStr("tag") + + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + + Optional("/", default=[False]) + .setResultsName("empty") + .setParseAction(lambda s, l, t: t[0] == "/") + + Suppress(">") + ) else: printablesLessRAbrack = "".join(c for c in printables if c not in ">") - tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr("tag") + \ - Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ - Optional( Suppress("=") + tagAttrValue ) ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word( + printablesLessRAbrack + ) + openTag = ( + Suppress("<") + + tagStr("tag") + + Dict( + ZeroOrMore( + Group( + tagAttrName.setParseAction(downcaseTokens) + + Optional(Suppress("=") + tagAttrValue) + ) + ) + ) + + Optional("/", default=[False]) + .setResultsName("empty") + .setParseAction(lambda s, l, t: t[0] == "/") + + Suppress(">") + ) closeTag = Combine(_L("") - openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) - closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % resname) + openTag = openTag.setResultsName( + "start" + "".join(resname.replace(":", " ").title().split()) + ).setName("<%s>" % resname) + closeTag = closeTag.setResultsName( + "end" + "".join(resname.replace(":", " ").title().split()) + ).setName("" % resname) openTag.tag = resname closeTag.tag = resname return openTag, closeTag + def makeHTMLTags(tagStr): """ Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches @@ -4884,14 +5487,15 @@ def makeHTMLTags(tagStr): # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple a,a_end = makeHTMLTags("A") link_expr = a + SkipTo(a_end)("link_text") + a_end - + for link in link_expr.searchString(text): # attributes in the tag (like "href" shown here) are also accessible as named results print(link.link_text, '->', link.href) prints:: pyparsing -> http://pyparsing.wikispaces.com """ - return _makeTags( tagStr, False ) + return _makeTags(tagStr, False) + def makeXMLTags(tagStr): """ @@ -4900,9 +5504,10 @@ def makeXMLTags(tagStr): Example: similar to L{makeHTMLTags} """ - return _makeTags( tagStr, True ) + return _makeTags(tagStr, True) + -def withAttribute(*args,**attrDict): +def withAttribute(*args, **attrDict): """ Helper to create a validating parse action to be used with start tags created with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag @@ -4917,7 +5522,7 @@ def withAttribute(*args,**attrDict): - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) For attribute names with a namespace prefix, you must use the second form. Attribute names are matched insensitive to upper/lower case. - + If just testing for C{class} (with or without a namespace), use C{L{withClass}}. To verify that the attribute exists, but without specifying a value, pass @@ -4931,7 +5536,7 @@ def withAttribute(*args,**attrDict):
1,3 2,3 1,1
this has no type
- + ''' div,div_end = makeHTMLTags("div") @@ -4940,7 +5545,7 @@ def withAttribute(*args,**attrDict): grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.searchString(html): print(grid_header.body) - + # construct a match with any div tag having a type attribute, regardless of the value div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") @@ -4956,18 +5561,27 @@ def withAttribute(*args,**attrDict): attrs = args[:] else: attrs = attrDict.items() - attrs = [(k,v) for k,v in attrs] - def pa(s,l,tokens): - for attrName,attrValue in attrs: + attrs = [(k, v) for k, v in attrs] + + def pa(s, l, tokens): + for attrName, attrValue in attrs: if attrName not in tokens: - raise ParseException(s,l,"no matching attribute " + attrName) + raise ParseException(s, l, "no matching attribute " + attrName) if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % - (attrName, tokens[attrName], attrValue)) + raise ParseException( + s, + l, + "attribute '%s' has value '%s', must be '%s'" + % (attrName, tokens[attrName], attrValue), + ) + return pa + + withAttribute.ANY_VALUE = object() -def withClass(classname, namespace=''): + +def withClass(classname, namespace=""): """ Simplified version of C{L{withAttribute}} when matching on a div class - made difficult because C{class} is a reserved word in Python. @@ -4980,15 +5594,15 @@ def withClass(classname, namespace=''):
1,3 2,3 1,1
this <div> has no class
- + ''' div,div_end = makeHTMLTags("div") div_grid = div().setParseAction(withClass("grid")) - + grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.searchString(html): print(grid_header.body) - + div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") for div_header in div_expr.searchString(html): @@ -5000,20 +5614,22 @@ def withClass(classname, namespace=''): 1,3 2,3 1,1 """ classattr = "%s:class" % namespace if namespace else "class" - return withAttribute(**{classattr : classname}) + return withAttribute(**{classattr: classname}) + opAssoc = _Constants() opAssoc.LEFT = object() opAssoc.RIGHT = object() -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): + +def infixNotation(baseExpr, opList, lpar=Suppress("("), rpar=Suppress(")")): """ Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached - to operator expressions. The generated parser will also recognize the use + to operator expressions. The generated parser will also recognize the use of parentheses to override operator precedences (see example below). - + Note: if you define a deep operator list, you may see performance issues when using infixNotation. See L{ParserElement.enablePackrat} for a mechanism to potentially improve your parser performance. @@ -5043,15 +5659,15 @@ def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): Example:: # simple example of four-function arithmetic with ints and variable names integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - + varname = pyparsing_common.identifier + arith_expr = infixNotation(integer | varname, [ ('-', 1, opAssoc.RIGHT), (oneOf('* /'), 2, opAssoc.LEFT), (oneOf('+ -'), 2, opAssoc.LEFT), ]) - + arith_expr.runTests(''' 5+3*6 (5+3)*6 @@ -5068,44 +5684,64 @@ def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): [[['-', 2], '-', ['-', 11]]] """ ret = Forward() - lastExpr = baseExpr | ( lpar + ret + rpar ) - for i,operDef in enumerate(opList): - opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] + lastExpr = baseExpr | (lpar + ret + rpar) + for i, operDef in enumerate(opList): + opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr if arity == 3: if opExpr is None or len(opExpr) != 2: - raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") + raise ValueError( + "if numterms=3, opExpr must be a tuple or list of two expressions" + ) opExpr1, opExpr2 = opExpr thisExpr = Forward().setName(termName) if rightLeftAssoc == opAssoc.LEFT: if arity == 1: - matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) + matchExpr = FollowedBy(lastExpr + opExpr) + Group( + lastExpr + OneOrMore(opExpr) + ) elif arity == 2: if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( + lastExpr + OneOrMore(opExpr + lastExpr) + ) else: - matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) + matchExpr = FollowedBy(lastExpr + lastExpr) + Group( + lastExpr + OneOrMore(lastExpr) + ) elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ - Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) + matchExpr = FollowedBy( + lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr + ) + Group(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + raise ValueError( + "operator must be unary (1), binary (2), or ternary (3)" + ) elif rightLeftAssoc == opAssoc.RIGHT: if arity == 1: # try to avoid LR with this extra test if not isinstance(opExpr, Optional): opExpr = Optional(opExpr) - matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) + matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( + opExpr + thisExpr + ) elif arity == 2: if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( + lastExpr + OneOrMore(opExpr + thisExpr) + ) else: - matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) + matchExpr = FollowedBy(lastExpr + thisExpr) + Group( + lastExpr + OneOrMore(thisExpr) + ) elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ - Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) + matchExpr = FollowedBy( + lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr + ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + raise ValueError( + "operator must be unary (1), binary (2), or ternary (3)" + ) else: raise ValueError("operator must indicate right or left associativity") if pa: @@ -5113,19 +5749,27 @@ def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): matchExpr.setParseAction(*pa) else: matchExpr.setParseAction(pa) - thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) + thisExpr <<= matchExpr.setName(termName) | lastExpr lastExpr = thisExpr ret <<= lastExpr return ret + operatorPrecedence = infixNotation """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" -dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") -sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") -quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| - Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") +dblQuotedString = Combine( + Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' +).setName("string enclosed in double quotes") +sglQuotedString = Combine( + Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" +).setName("string enclosed in single quotes") +quotedString = Combine( + Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' + | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" +).setName("quotedString using single or double quotes") +unicodeString = Combine(_L("u") + quotedString.copy()).setName("unicode string literal") + def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): """ @@ -5159,23 +5803,23 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) - c_function = (decl_data_type("type") + c_function = (decl_data_type("type") + ident("name") - + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + code_body("body")) c_function.ignore(cStyleComment) - + source_code = ''' - int is_odd(int x) { - return (x%2); + int is_odd(int x) { + return (x%2); } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { + + int dec_to_hex(char hchar) { + if (hchar >= '0' && hchar <= '9') { + return (ord(hchar)-ord('0')); + } else { return (10+ord(hchar)-ord('A')); - } + } } ''' for func in c_function.searchString(source_code): @@ -5188,35 +5832,56 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: - if isinstance(opener,basestring) and isinstance(closer,basestring): - if len(opener) == 1 and len(closer)==1: + if isinstance(opener, basestring) and isinstance(closer, basestring): + if len(opener) == 1 and len(closer) == 1: if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) + content = Combine( + OneOrMore( + ~ignoreExpr + + CharsNotIn( + opener + closer + ParserElement.DEFAULT_WHITE_CHARS, + exact=1, + ) + ) + ).setParseAction(lambda t: t[0].strip()) else: - content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS - ).setParseAction(lambda t:t[0].strip())) + content = empty.copy() + CharsNotIn( + opener + closer + ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t: t[0].strip()) else: if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - ~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) + content = Combine( + OneOrMore( + ~ignoreExpr + + ~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) + ) + ).setParseAction(lambda t: t[0].strip()) else: - content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + - CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) + content = Combine( + OneOrMore( + ~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) + ) + ).setParseAction(lambda t: t[0].strip()) else: - raise ValueError("opening and closing arguments must be strings if no content expression is given") + raise ValueError( + "opening and closing arguments must be strings if no content expression" + " is given" + ) ret = Forward() if ignoreExpr is not None: - ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) + ret <<= Group( + Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) + ) else: - ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) - ret.setName('nested %s%s expression' % (opener,closer)) + ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) + ret.setName("nested %s%s expression" % (opener, closer)) return ret + def indentedBlock(blockStatementExpr, indentStack, indent=True): """ Helper method for defining space-delimited indentation blocks, such as @@ -5293,56 +5958,75 @@ def eggs(z): 'spam', ['(', 'x', 'y', ')'], ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] + [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] """ - def checkPeerIndent(s,l,t): - if l >= len(s): return - curCol = col(l,s) + + def checkPeerIndent(s, l, t): + if l >= len(s): + return + curCol = col(l, s) if curCol != indentStack[-1]: if curCol > indentStack[-1]: - raise ParseFatalException(s,l,"illegal nesting") - raise ParseException(s,l,"not a peer entry") + raise ParseFatalException(s, l, "illegal nesting") + raise ParseException(s, l, "not a peer entry") - def checkSubIndent(s,l,t): - curCol = col(l,s) + def checkSubIndent(s, l, t): + curCol = col(l, s) if curCol > indentStack[-1]: - indentStack.append( curCol ) + indentStack.append(curCol) else: - raise ParseException(s,l,"not a subentry") - - def checkUnindent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): - raise ParseException(s,l,"not an unindent") + raise ParseException(s, l, "not a subentry") + + def checkUnindent(s, l, t): + if l >= len(s): + return + curCol = col(l, s) + if not (indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + raise ParseException(s, l, "not an unindent") indentStack.pop() NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) - INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') - PEER = Empty().setParseAction(checkPeerIndent).setName('') - UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') + INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName("INDENT") + PEER = Empty().setParseAction(checkPeerIndent).setName("") + UNDENT = Empty().setParseAction(checkUnindent).setName("UNINDENT") if indent: - smExpr = Group( Optional(NL) + - #~ FollowedBy(blockStatementExpr) + - INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) + smExpr = Group( + Optional(NL) + + + # ~ FollowedBy(blockStatementExpr) + + INDENT + + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) + + UNDENT + ) else: - smExpr = Group( Optional(NL) + - (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) + smExpr = Group( + Optional(NL) + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) + ) blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.setName('indented block') + return smExpr.setName("indented block") + alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") -anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) -_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) -commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") +anyOpenTag, anyCloseTag = makeHTMLTags( + Word(alphas, alphanums + "_:").setName("any tag") +) +_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), "><& \"'")) +commonHTMLEntity = Regex( + "&(?P" + "|".join(_htmlEntityMap.keys()) + ");" +).setName("common HTML entity") + + def replaceHTMLEntity(t): """Helper parser action to replace common HTML entities with their special characters""" return _htmlEntityMap.get(t.entity) + # it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") +cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").setName( + "C style comment" +) "Comment of the form C{/* ... */}" htmlComment = Regex(r"").setName("HTML comment") @@ -5352,7 +6036,9 @@ def replaceHTMLEntity(t): dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") "Comment of the form C{// ... (to end of line)}" -cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") +cppStyleComment = Combine( + Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment +).setName("C++ style comment") "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" javaStyleComment = cppStyleComment @@ -5361,10 +6047,19 @@ def replaceHTMLEntity(t): pythonStyleComment = Regex(r"#.*").setName("Python style comment") "Comment of the form C{# ... (to end of line)}" -_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + - Optional( Word(" \t") + - ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") -commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") +_commasepitem = ( + Combine( + OneOrMore( + Word(printables, excludeChars=",") + + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()) + ) + ) + .streamline() + .setName("commaItem") +) +commaSeparatedList = delimitedList( + Optional(quotedString.copy() | _commasepitem, default="") +).setName("commaSeparatedList") """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" @@ -5518,48 +6213,80 @@ class pyparsing_common: integer = Word(nums).setName("integer").setParseAction(convertToInteger) """expression that parses an unsigned integer, returns an int""" - hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) + hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16)) """expression that parses a hexadecimal integer, returns an int""" - signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) + signed_integer = ( + Regex(r"[+-]?\d+").setName("signed integer").setParseAction(convertToInteger) + ) """expression that parses an integer with optional leading sign, returns an int""" - fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") + fraction = ( + signed_integer().setParseAction(convertToFloat) + + "/" + + signed_integer().setParseAction(convertToFloat) + ).setName("fraction") """fractional expression of an integer divided by an integer, returns a float""" - fraction.addParseAction(lambda t: t[0]/t[-1]) + fraction.addParseAction(lambda t: t[0] / t[-1]) - mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") + mixed_integer = ( + fraction | signed_integer + Optional(Optional("-").suppress() + fraction) + ).setName("fraction or mixed integer-fraction") """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" mixed_integer.addParseAction(sum) - real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) + real = Regex(r"[+-]?\d+\.\d*").setName("real number").setParseAction(convertToFloat) """expression that parses a floating point number and returns a float""" - sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) + sci_real = ( + Regex(r"[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)") + .setName("real number with scientific notation") + .setParseAction(convertToFloat) + ) """expression that parses a floating point number with optional scientific notation and returns a float""" # streamlining this expression makes the docs nicer-looking number = (sci_real | real | signed_integer).streamline() """any numeric expression, returns the corresponding Python type""" - fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) + fnumber = ( + Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") + .setName("fnumber") + .setParseAction(convertToFloat) + ) """any int or real number, returned as float""" - - identifier = Word(alphas+'_', alphanums+'_').setName("identifier") + + identifier = Word(alphas + "_", alphanums + "_").setName("identifier") """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") + + ipv4_address = Regex( + r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" + ).setName("IPv4 address") "IPv4 address (C{0.0.0.0 - 255.255.255.255})" - _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") - _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") - _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") - _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) + _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").setName("hex_integer") + _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).setName( + "full IPv6 address" + ) + _short_ipv6_address = ( + Optional(_ipv6_part + (":" + _ipv6_part) * (0, 6)) + + "::" + + Optional(_ipv6_part + (":" + _ipv6_part) * (0, 6)) + ).setName("short IPv6 address") + _short_ipv6_address.addCondition( + lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 + ) _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") - ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") + ipv6_address = Combine( + (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName( + "IPv6 address" + ) + ).setName("IPv6 address") "IPv6 address (long, short, or mixed form)" - - mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") + + mac_address = Regex( + r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" + ).setName("MAC address") "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" @staticmethod @@ -5577,11 +6304,13 @@ def convertToDate(fmt="%Y-%m-%d"): prints:: [datetime.date(1999, 12, 31)] """ - def cvt_fn(s,l,t): + + def cvt_fn(s, l, t): try: return datetime.strptime(t[0], fmt).date() except ValueError as ve: raise ParseException(s, l, str(ve)) + return cvt_fn @staticmethod @@ -5599,41 +6328,61 @@ def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): prints:: [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] """ - def cvt_fn(s,l,t): + + def cvt_fn(s, l, t): try: return datetime.strptime(t[0], fmt) except ValueError as ve: raise ParseException(s, l, str(ve)) + return cvt_fn - iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") + iso8601_date = Regex( + r"(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?" + ).setName("ISO8601 date") "ISO8601 date (C{yyyy-mm-dd})" - iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") + iso8601_datetime = Regex( + r"(?P\d{4})-(?P\d\d)-(?P\d\d)[T" + r" ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?" + ).setName("ISO8601 datetime") "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" - uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") + uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").setName("UUID") "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() + @staticmethod def stripHTMLTags(s, l, tokens): """ Parse action to remove HTML tags from web page HTML source Example:: - # strip HTML links from normal text + # strip HTML links from normal text text = 'More info at the
pyparsing wiki page' td,td_end = makeHTMLTags("TD") table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - + print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' """ return pyparsing_common._html_stripper.transformString(tokens[0]) - _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') - + Optional( White(" \t") ) ) ).streamline().setName("commaItem") - comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") + _commasepitem = ( + Combine( + OneOrMore( + ~Literal(",") + + ~LineEnd() + + Word(printables, excludeChars=",") + + Optional(White(" \t")) + ) + ) + .streamline() + .setName("commaItem") + ) + comma_separated_list = delimitedList( + Optional(quotedString.copy() | _commasepitem, default="") + ).setName("comma separated list") """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) @@ -5645,22 +6394,28 @@ def stripHTMLTags(s, l, tokens): if __name__ == "__main__": - selectToken = CaselessLiteral("select") - fromToken = CaselessLiteral("from") + selectToken = CaselessLiteral("select") + fromToken = CaselessLiteral("from") - ident = Word(alphas, alphanums + "_$") + ident = Word(alphas, alphanums + "_$") - columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) columnNameList = Group(delimitedList(columnName)).setName("columns") - columnSpec = ('*' | columnNameList) + columnSpec = "*" | columnNameList - tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - tableNameList = Group(delimitedList(tableName)).setName("tables") - - simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") + tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + tableNameList = Group(delimitedList(tableName)).setName("tables") + + simpleSQL = ( + selectToken("command") + + columnSpec("columns") + + fromToken + + tableNameList("tables") + ) # demo runTests method, including embedded comments in test string - simpleSQL.runTests(""" + simpleSQL.runTests( + """ # '*' as column list and dotted table name select * from SYS.XYZZY @@ -5682,34 +6437,44 @@ def stripHTMLTags(s, l, tokens): # invalid column name - should fail Select ^^^ frox Sys.dual - """) + """ + ) - pyparsing_common.number.runTests(""" + pyparsing_common.number.runTests( + """ 100 -100 +100 3.14159 6.02e23 1e-12 - """) + """ + ) # any int or real number, returned as float - pyparsing_common.fnumber.runTests(""" + pyparsing_common.fnumber.runTests( + """ 100 -100 +100 3.14159 6.02e23 1e-12 - """) + """ + ) - pyparsing_common.hex_integer.runTests(""" + pyparsing_common.hex_integer.runTests( + """ 100 FF - """) + """ + ) import uuid + pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(""" + pyparsing_common.uuid.runTests( + """ 12345678-1234-5678-1234-567812345678 - """) + """ + ) diff --git a/doc/tutorial/text_analytics/data/languages/fetch_data.py b/doc/tutorial/text_analytics/data/languages/fetch_data.py index 2dd0f208ade86..86f2a7c04f3ed 100644 --- a/doc/tutorial/text_analytics/data/languages/fetch_data.py +++ b/doc/tutorial/text_analytics/data/languages/fetch_data.py @@ -1,34 +1,32 @@ - # simple python script to collect text paragraphs from various languages on the # same topic namely the Wikipedia encyclopedia itself +import codecs import os from urllib.request import Request, build_opener import lxml.html -from lxml.etree import ElementTree import numpy as np - -import codecs +from lxml.etree import ElementTree pages = { - 'ar': 'http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7', # noqa: E501 - 'de': 'http://de.wikipedia.org/wiki/Wikipedia', - 'en': 'https://en.wikipedia.org/wiki/Wikipedia', - 'es': 'http://es.wikipedia.org/wiki/Wikipedia', - 'fr': 'http://fr.wikipedia.org/wiki/Wikip%C3%A9dia', - 'it': 'http://it.wikipedia.org/wiki/Wikipedia', - 'ja': 'http://ja.wikipedia.org/wiki/Wikipedia', - 'nl': 'http://nl.wikipedia.org/wiki/Wikipedia', - 'pl': 'http://pl.wikipedia.org/wiki/Wikipedia', - 'pt': 'http://pt.wikipedia.org/wiki/Wikip%C3%A9dia', - 'ru': 'http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F', # noqa: E501 -# u'zh': u'http://zh.wikipedia.org/wiki/Wikipedia', + "ar": "http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7", # noqa: E501 + "de": "http://de.wikipedia.org/wiki/Wikipedia", + "en": "https://en.wikipedia.org/wiki/Wikipedia", + "es": "http://es.wikipedia.org/wiki/Wikipedia", + "fr": "http://fr.wikipedia.org/wiki/Wikip%C3%A9dia", + "it": "http://it.wikipedia.org/wiki/Wikipedia", + "ja": "http://ja.wikipedia.org/wiki/Wikipedia", + "nl": "http://nl.wikipedia.org/wiki/Wikipedia", + "pl": "http://pl.wikipedia.org/wiki/Wikipedia", + "pt": "http://pt.wikipedia.org/wiki/Wikip%C3%A9dia", + "ru": "http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F", # noqa: E501 + # u'zh': u'http://zh.wikipedia.org/wiki/Wikipedia', } -html_folder = 'html' -text_folder = 'paragraphs' -short_text_folder = 'short_paragraphs' +html_folder = "html" +text_folder = "paragraphs" +short_text_folder = "short_paragraphs" n_words_per_short_text = 5 @@ -46,42 +44,41 @@ os.makedirs(short_text_lang_folder) opener = build_opener() - html_filename = os.path.join(html_folder, lang + '.html') + html_filename = os.path.join(html_folder, lang + ".html") if not os.path.exists(html_filename): print("Downloading %s" % page) request = Request(page) # change the User Agent to avoid being blocked by Wikipedia # downloading a couple of articles should not be considered abusive - request.add_header('User-Agent', 'OpenAnything/1.0') + request.add_header("User-Agent", "OpenAnything/1.0") html_content = opener.open(request).read() - with open(html_filename, 'wb') as f: + with open(html_filename, "wb") as f: f.write(html_content) # decode the payload explicitly as UTF-8 since lxml is confused for some # reason - with codecs.open(html_filename,'r','utf-8') as html_file: + with codecs.open(html_filename, "r", "utf-8") as html_file: html_content = html_file.read() tree = ElementTree(lxml.html.document_fromstring(html_content)) i = 0 j = 0 - for p in tree.findall('//p'): + for p in tree.findall("//p"): content = p.text_content() if len(content) < 100: # skip paragraphs that are too short - probably too noisy and not # representative of the actual language continue - text_filename = os.path.join(text_lang_folder, - '%s_%04d.txt' % (lang, i)) + text_filename = os.path.join(text_lang_folder, "%s_%04d.txt" % (lang, i)) print("Writing %s" % text_filename) - with open(text_filename, 'wb') as f: - f.write(content.encode('utf-8', 'ignore')) + with open(text_filename, "wb") as f: + f.write(content.encode("utf-8", "ignore")) i += 1 # split the paragraph into fake smaller paragraphs to make the # problem harder e.g. more similar to tweets - if lang in ('zh', 'ja'): - # FIXME: whitespace tokenizing does not work on chinese and japanese + if lang in ("zh", "ja"): + # FIXME: whitespace tokenizing does not work on chinese and japanese continue words = content.split() n_groups = len(words) / n_words_per_short_text @@ -92,12 +89,12 @@ for group in groups: small_content = " ".join(group) - short_text_filename = os.path.join(short_text_lang_folder, - '%s_%04d.txt' % (lang, j)) + short_text_filename = os.path.join( + short_text_lang_folder, "%s_%04d.txt" % (lang, j) + ) print("Writing %s" % short_text_filename) - with open(short_text_filename, 'wb') as f: - f.write(small_content.encode('utf-8', 'ignore')) + with open(short_text_filename, "wb") as f: + f.write(small_content.encode("utf-8", "ignore")) j += 1 if j >= 1000: break - diff --git a/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py b/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py index e591aca0f241b..e74dfc621ebda 100644 --- a/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py +++ b/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py @@ -5,11 +5,9 @@ from contextlib import closing from urllib.request import urlopen +URL = "http://www.cs.cornell.edu/people/pabo/movie-review-data/review_polarity.tar.gz" -URL = ("http://www.cs.cornell.edu/people/pabo/" - "movie-review-data/review_polarity.tar.gz") - -ARCHIVE_NAME = URL.rsplit('/', 1)[1] +ARCHIVE_NAME = URL.rsplit("/", 1)[1] DATA_FOLDER = "txt_sentoken" @@ -18,10 +16,10 @@ if not os.path.exists(ARCHIVE_NAME): print("Downloading dataset from %s (3 MB)" % URL) opener = urlopen(URL) - with open(ARCHIVE_NAME, 'wb') as archive: + with open(ARCHIVE_NAME, "wb") as archive: archive.write(opener.read()) print("Decompressing %s" % ARCHIVE_NAME) with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: - archive.extractall(path='.') + archive.extractall(path=".") os.remove(ARCHIVE_NAME) diff --git a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py index 438481120d126..e0b028f52d342 100644 --- a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py +++ b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py @@ -11,13 +11,12 @@ import sys -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.linear_model import Perceptron -from sklearn.pipeline import Pipeline +from sklearn import metrics from sklearn.datasets import load_files +from sklearn.feature_extraction.text import TfidfVectorizer # noqa +from sklearn.linear_model import Perceptron # noqa from sklearn.model_selection import train_test_split -from sklearn import metrics - +from sklearn.pipeline import Pipeline # noqa # The training data folder must be passed as first argument languages_data_folder = sys.argv[1] @@ -25,7 +24,8 @@ # Split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.5) + dataset.data, dataset.target, test_size=0.5 +) # TASK: Build a vectorizer that splits strings into sequence of 1 to 3 @@ -39,24 +39,27 @@ # TASK: Predict the outcome on the testing set in a variable named y_predicted # Print the classification report -print(metrics.classification_report(y_test, y_predicted, - target_names=dataset.target_names)) +print( + metrics.classification_report( + y_test, y_predicted, target_names=dataset.target_names # noqa + ) +) # Plot the confusion matrix -cm = metrics.confusion_matrix(y_test, y_predicted) +cm = metrics.confusion_matrix(y_test, y_predicted) # noqa print(cm) -#import matplotlib.pyplot as plt -#plt.matshow(cm, cmap=plt.cm.jet) -#plt.show() +# import matplotlib.pyplot as plt +# plt.matshow(cm, cmap=plt.cm.jet) +# plt.show() # Predict the result on some short new sentences: sentences = [ - 'This is a language detection test.', - 'Ceci est un test de d\xe9tection de la langue.', - 'Dies ist ein Test, um die Sprache zu erkennen.', + "This is a language detection test.", + "Ceci est un test de d\xe9tection de la langue.", + "Dies ist ein Test, um die Sprache zu erkennen.", ] -predicted = clf.predict(sentences) +predicted = clf.predict(sentences) # noqa for s, p in zip(sentences, predicted): print('The language of "%s" is "%s"' % (s, dataset.target_names[p])) diff --git a/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py b/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py index 23299f5f01b3d..afefcac4a31bb 100644 --- a/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py +++ b/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py @@ -12,14 +12,13 @@ # License: Simplified BSD import sys -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.svm import LinearSVC -from sklearn.pipeline import Pipeline -from sklearn.model_selection import GridSearchCV -from sklearn.datasets import load_files -from sklearn.model_selection import train_test_split -from sklearn import metrics +from sklearn import metrics +from sklearn.datasets import load_files +from sklearn.feature_extraction.text import TfidfVectorizer # noqa +from sklearn.model_selection import GridSearchCV, train_test_split # noqa +from sklearn.pipeline import Pipeline # noqa +from sklearn.svm import LinearSVC # noqa if __name__ == "__main__": # NOTE: we put the following in a 'if __name__ == "__main__"' protected @@ -35,7 +34,8 @@ # split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.25, random_state=None) + dataset.data, dataset.target, test_size=0.25, random_state=None + ) # TASK: Build a vectorizer / classifier pipeline that filters out tokens # that are too rare or too frequent @@ -51,11 +51,14 @@ # named y_predicted # Print the classification report - print(metrics.classification_report(y_test, y_predicted, - target_names=dataset.target_names)) + print( + metrics.classification_report( + y_test, y_predicted, target_names=dataset.target_names # noqa + ) + ) # Print and plot the confusion matrix - cm = metrics.confusion_matrix(y_test, y_predicted) + cm = metrics.confusion_matrix(y_test, y_predicted) # noqa print(cm) # import matplotlib.pyplot as plt diff --git a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py index 21cee0c80e00e..5ab6ae73afa10 100644 --- a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py +++ b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py @@ -11,13 +11,12 @@ import sys +from sklearn import metrics +from sklearn.datasets import load_files from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import Perceptron -from sklearn.pipeline import Pipeline -from sklearn.datasets import load_files from sklearn.model_selection import train_test_split -from sklearn import metrics - +from sklearn.pipeline import Pipeline # The training data folder must be passed as first argument languages_data_folder = sys.argv[1] @@ -25,20 +24,22 @@ # Split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.5) + dataset.data, dataset.target, test_size=0.5 +) # TASK: Build a vectorizer that splits strings into sequence of 1 to 3 # characters instead of word tokens -vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer='char', - use_idf=False) +vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer="char", use_idf=False) # TASK: Build a vectorizer / classifier pipeline using the previous analyzer # the pipeline instance should stored in a variable named clf -clf = Pipeline([ - ('vec', vectorizer), - ('clf', Perceptron()), -]) +clf = Pipeline( + [ + ("vec", vectorizer), + ("clf", Perceptron()), + ] +) # TASK: Fit the pipeline on the training set clf.fit(docs_train, y_train) @@ -47,22 +48,25 @@ y_predicted = clf.predict(docs_test) # Print the classification report -print(metrics.classification_report(y_test, y_predicted, - target_names=dataset.target_names)) +print( + metrics.classification_report( + y_test, y_predicted, target_names=dataset.target_names + ) +) # Plot the confusion matrix cm = metrics.confusion_matrix(y_test, y_predicted) print(cm) -#import matlotlib.pyplot as plt -#plt.matshow(cm, cmap=plt.cm.jet) -#plt.show() +# import matlotlib.pyplot as plt +# plt.matshow(cm, cmap=plt.cm.jet) +# plt.show() # Predict the result on some short new sentences: sentences = [ - 'This is a language detection test.', - 'Ceci est un test de d\xe9tection de la langue.', - 'Dies ist ein Test, um die Sprache zu erkennen.', + "This is a language detection test.", + "Ceci est un test de d\xe9tection de la langue.", + "Dies ist ein Test, um die Sprache zu erkennen.", ] predicted = clf.predict(sentences) diff --git a/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py b/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py index 434bece341975..013753c57b6b7 100644 --- a/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py +++ b/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py @@ -12,14 +12,13 @@ # License: Simplified BSD import sys + +from sklearn import metrics +from sklearn.datasets import load_files from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.svm import LinearSVC +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import Pipeline -from sklearn.model_selection import GridSearchCV -from sklearn.datasets import load_files -from sklearn.model_selection import train_test_split -from sklearn import metrics - +from sklearn.svm import LinearSVC if __name__ == "__main__": # NOTE: we put the following in a 'if __name__ == "__main__"' protected @@ -35,40 +34,51 @@ # split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.25, random_state=None) + dataset.data, dataset.target, test_size=0.25, random_state=None + ) # TASK: Build a vectorizer / classifier pipeline that filters out tokens # that are too rare or too frequent - pipeline = Pipeline([ - ('vect', TfidfVectorizer(min_df=3, max_df=0.95)), - ('clf', LinearSVC(C=1000)), - ]) + pipeline = Pipeline( + [ + ("vect", TfidfVectorizer(min_df=3, max_df=0.95)), + ("clf", LinearSVC(C=1000)), + ] + ) # TASK: Build a grid search to find out whether unigrams or bigrams are # more useful. # Fit the pipeline on the training set using grid search for the parameters parameters = { - 'vect__ngram_range': [(1, 1), (1, 2)], + "vect__ngram_range": [(1, 1), (1, 2)], } grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1) grid_search.fit(docs_train, y_train) # TASK: print the mean and std for each candidate along with the parameter # settings for all the candidates explored by grid search. - n_candidates = len(grid_search.cv_results_['params']) + n_candidates = len(grid_search.cv_results_["params"]) for i in range(n_candidates): - print(i, 'params - %s; mean - %0.2f; std - %0.2f' - % (grid_search.cv_results_['params'][i], - grid_search.cv_results_['mean_test_score'][i], - grid_search.cv_results_['std_test_score'][i])) + print( + i, + "params - %s; mean - %0.2f; std - %0.2f" + % ( + grid_search.cv_results_["params"][i], + grid_search.cv_results_["mean_test_score"][i], + grid_search.cv_results_["std_test_score"][i], + ), + ) # TASK: Predict the outcome on the testing set and store it in a variable # named y_predicted y_predicted = grid_search.predict(docs_test) # Print the classification report - print(metrics.classification_report(y_test, y_predicted, - target_names=dataset.target_names)) + print( + metrics.classification_report( + y_test, y_predicted, target_names=dataset.target_names + ) + ) # Print and plot the confusion matrix cm = metrics.confusion_matrix(y_test, y_predicted) diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py index 10ab666ab277e..9671f791153fc 100644 --- a/examples/applications/plot_cyclical_feature_engineering.py +++ b/examples/applications/plot_cyclical_feature_engineering.py @@ -35,7 +35,6 @@ # demand around the middle of the days: import matplotlib.pyplot as plt - fig, ax = plt.subplots(figsize=(12, 4)) average_week_demand = df.groupby(["weekday", "hour"]).mean()["count"] average_week_demand.plot(ax=ax) @@ -161,6 +160,10 @@ # %% X.iloc[train_4] +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.model_selection import cross_validate + # %% # All is well. We are now ready to do some predictive modeling! # @@ -183,10 +186,6 @@ # we only try the default hyper-parameters for this model: from sklearn.pipeline import make_pipeline from sklearn.preprocessing import OrdinalEncoder -from sklearn.compose import ColumnTransformer -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.model_selection import cross_validate - categorical_columns = [ "weather", @@ -239,6 +238,10 @@ def evaluate(model, X, y, cv): evaluate(gbrt_pipeline, X, y, cv=ts_cv) +import numpy as np + +from sklearn.linear_model import RidgeCV + # %% # This model has an average error around 4 to 5% of the maximum demand. This is # quite good for a first trial without any hyper-parameter tuning! We just had @@ -257,11 +260,7 @@ def evaluate(model, X, y, cv): # For consistency, we scale the numerical features to the same 0-1 range using # class:`sklearn.preprocessing.MinMaxScaler`, although in this case it does not # impact the results much because they are already on comparable scales: -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import MinMaxScaler -from sklearn.linear_model import RidgeCV -import numpy as np - +from sklearn.preprocessing import MinMaxScaler, OneHotEncoder one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse=False) alphas = np.logspace(-6, 6, 25) @@ -602,6 +601,8 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # "workingday" and features derived from "hours". This issue will be addressed # in the following section. +from sklearn.pipeline import FeatureUnion + # %% # Modeling pairwise interactions with splines and polynomial features # ------------------------------------------------------------------- @@ -615,8 +616,6 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # grained spline encoded hours to model the "workingday"/"hours" interaction # explicitly without introducing too many new variables: from sklearn.preprocessing import PolynomialFeatures -from sklearn.pipeline import FeatureUnion - hour_workday_interaction = make_pipeline( ColumnTransformer( @@ -663,7 +662,6 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # polynomial kernel expansion. Let us try the latter: from sklearn.kernel_approximation import Nystroem - cyclic_spline_poly_pipeline = make_pipeline( cyclic_spline_transformer, Nystroem(kernel="poly", degree=2, n_components=300, random_state=0), diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py index 84702034152f5..93af1a4f5f89f 100644 --- a/examples/applications/plot_digits_denoising.py +++ b/examples/applications/plot_digits_denoising.py @@ -32,9 +32,10 @@ # :func:`~sklearn.datasets.fetch_openml` to get this dataset. In addition, we # normalize the dataset such that all pixel values are in the range (0, 1). import numpy as np + from sklearn.datasets import fetch_openml -from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MinMaxScaler X, y = fetch_openml(data_id=41082, as_frame=False, return_X_y=True, parser="pandas") X = MinMaxScaler().fit_transform(X) diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py index 069f0f5aad202..34f40da368d03 100644 --- a/examples/applications/plot_face_recognition.py +++ b/examples/applications/plot_face_recognition.py @@ -13,19 +13,17 @@ """ # %% from time import time + import matplotlib.pyplot as plt -from sklearn.model_selection import train_test_split -from sklearn.model_selection import RandomizedSearchCV from sklearn.datasets import fetch_lfw_people -from sklearn.metrics import classification_report -from sklearn.metrics import ConfusionMatrixDisplay -from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA +from sklearn.metrics import ConfusionMatrixDisplay, classification_report +from sklearn.model_selection import RandomizedSearchCV, train_test_split +from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC from sklearn.utils.fixes import loguniform - # %% # Download the data, if not already on disk and load it as numpy arrays diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py index d05f4ab497ada..d96e88657e4c8 100644 --- a/examples/applications/plot_model_complexity_influence.py +++ b/examples/applications/plot_model_complexity_influence.py @@ -38,16 +38,16 @@ # License: BSD 3 clause import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np from sklearn import datasets -from sklearn.model_selection import train_test_split -from sklearn.metrics import mean_squared_error -from sklearn.svm import NuSVR from sklearn.ensemble import GradientBoostingRegressor from sklearn.linear_model import SGDClassifier -from sklearn.metrics import hamming_loss +from sklearn.metrics import hamming_loss, mean_squared_error +from sklearn.model_selection import train_test_split +from sklearn.svm import NuSVR # Initialize random generator np.random.seed(0) diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py index a8e4f9b72a3b0..8468c1440800a 100644 --- a/examples/applications/plot_out_of_core_classification.py +++ b/examples/applications/plot_out_of_core_classification.py @@ -18,25 +18,23 @@ # @FedericoV # License: BSD 3 clause -from glob import glob import itertools import os.path import re +import sys import tarfile import time -import sys +from glob import glob +from html.parser import HTMLParser +from urllib.request import urlretrieve -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib import rcParams -from html.parser import HTMLParser -from urllib.request import urlretrieve from sklearn.datasets import get_data_home from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.linear_model import SGDClassifier -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.linear_model import Perceptron +from sklearn.linear_model import PassiveAggressiveClassifier, Perceptron, SGDClassifier from sklearn.naive_bayes import MultinomialNB diff --git a/examples/applications/plot_outlier_detection_wine.py b/examples/applications/plot_outlier_detection_wine.py index 45e4c64d9fcc4..c4adfa222a5dd 100644 --- a/examples/applications/plot_outlier_detection_wine.py +++ b/examples/applications/plot_outlier_detection_wine.py @@ -37,12 +37,13 @@ # Author: Virgile Fritsch # License: BSD 3 clause +import matplotlib.font_manager +import matplotlib.pyplot as plt import numpy as np + from sklearn.covariance import EllipticEnvelope -from sklearn.svm import OneClassSVM -import matplotlib.pyplot as plt -import matplotlib.font_manager from sklearn.datasets import load_wine +from sklearn.svm import OneClassSVM # Define "classifiers" to be used classifiers = { diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py index 9b99bcbfdfaf1..8fce81fb9fb4e 100644 --- a/examples/applications/plot_prediction_latency.py +++ b/examples/applications/plot_prediction_latency.py @@ -16,19 +16,18 @@ # Authors: Eustache Diemert # License: BSD 3 clause +import gc +import time from collections import defaultdict -import time -import gc -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.preprocessing import StandardScaler -from sklearn.model_selection import train_test_split from sklearn.datasets import make_regression from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import Ridge -from sklearn.linear_model import SGDRegressor +from sklearn.linear_model import Ridge, SGDRegressor +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler from sklearn.svm import SVR from sklearn.utils import shuffle diff --git a/examples/applications/plot_species_distribution_modeling.py b/examples/applications/plot_species_distribution_modeling.py index 3f932c3f6562c..cbf03e04efd23 100644 --- a/examples/applications/plot_species_distribution_modeling.py +++ b/examples/applications/plot_species_distribution_modeling.py @@ -43,12 +43,12 @@ from time import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.utils import Bunch +from sklearn import metrics, svm from sklearn.datasets import fetch_species_distributions -from sklearn import svm, metrics +from sklearn.utils import Bunch # if basemap is available, we'll use it. # otherwise, we'll improvise later... diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index 39708be5ef3e0..f295a9d123572 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -23,6 +23,7 @@ # alphavantage.co . import sys + import numpy as np import pandas as pd diff --git a/examples/applications/plot_tomography_l1_reconstruction.py b/examples/applications/plot_tomography_l1_reconstruction.py index 9ac351c12206c..d851613402571 100644 --- a/examples/applications/plot_tomography_l1_reconstruction.py +++ b/examples/applications/plot_tomography_l1_reconstruction.py @@ -39,12 +39,11 @@ class :class:`~sklearn.linear_model.Lasso`, that uses the coordinate descent # Author: Emmanuelle Gouillart # License: BSD 3 clause -import numpy as np -from scipy import sparse -from scipy import ndimage -from sklearn.linear_model import Lasso -from sklearn.linear_model import Ridge import matplotlib.pyplot as plt +import numpy as np +from scipy import ndimage, sparse + +from sklearn.linear_model import Lasso, Ridge def _weights(x, dx=1, orig=0): diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py index 38945241ab68b..0385fd7c89333 100644 --- a/examples/applications/plot_topics_extraction_with_nmf_lda.py +++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py @@ -27,11 +27,12 @@ # License: BSD 3 clause from time import time + import matplotlib.pyplot as plt -from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer -from sklearn.decomposition import NMF, MiniBatchNMF, LatentDirichletAllocation from sklearn.datasets import fetch_20newsgroups +from sklearn.decomposition import NMF, LatentDirichletAllocation, MiniBatchNMF +from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer n_samples = 2000 n_features = 1000 diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py index c8019fa72ae91..fbbba0dd2344f 100644 --- a/examples/applications/svm_gui.py +++ b/examples/applications/svm_gui.py @@ -30,13 +30,14 @@ from matplotlib.backends.backend_tkagg import ( NavigationToolbar2TkAgg as NavigationToolbar2Tk, ) -from matplotlib.figure import Figure -from matplotlib.contour import ContourSet import sys -import numpy as np import tkinter as Tk +import numpy as np +from matplotlib.contour import ContourSet +from matplotlib.figure import Figure + from sklearn import svm from sklearn.datasets import dump_svmlight_file diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index fcc337b0a4e00..0be1661d7ed5c 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -33,19 +33,17 @@ # Author: Olivier Grisel # License: BSD 3 clause -from bz2 import BZ2File import os +from bz2 import BZ2File from datetime import datetime from pprint import pprint from time import time +from urllib.request import urlopen import numpy as np - from scipy import sparse from sklearn.decomposition import randomized_svd -from urllib.request import urlopen - # %% # Download data, if not already on disk diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py index 615a3d1495eb8..944ebe270d1a0 100644 --- a/examples/bicluster/plot_bicluster_newsgroups.py +++ b/examples/bicluster/plot_bicluster_newsgroups.py @@ -23,14 +23,13 @@ """ -from collections import defaultdict import operator +from collections import defaultdict from time import time import numpy as np -from sklearn.cluster import SpectralCoclustering -from sklearn.cluster import MiniBatchKMeans +from sklearn.cluster import MiniBatchKMeans, SpectralCoclustering from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.cluster import v_measure_score diff --git a/examples/bicluster/plot_spectral_biclustering.py b/examples/bicluster/plot_spectral_biclustering.py index 3a0af07815c02..609697bc44d37 100644 --- a/examples/bicluster/plot_spectral_biclustering.py +++ b/examples/bicluster/plot_spectral_biclustering.py @@ -22,11 +22,10 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn.datasets import make_checkerboard from sklearn.cluster import SpectralBiclustering +from sklearn.datasets import make_checkerboard from sklearn.metrics import consensus_score - n_clusters = (4, 3) data, rows, columns = make_checkerboard( shape=(300, 300), n_clusters=n_clusters, noise=10, shuffle=False, random_state=0 diff --git a/examples/bicluster/plot_spectral_coclustering.py b/examples/bicluster/plot_spectral_coclustering.py index 0df275e83e3bd..92b10d93956e7 100644 --- a/examples/bicluster/plot_spectral_coclustering.py +++ b/examples/bicluster/plot_spectral_coclustering.py @@ -21,8 +21,8 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn.datasets import make_biclusters from sklearn.cluster import SpectralCoclustering +from sklearn.datasets import make_biclusters from sklearn.metrics import consensus_score data, rows, columns = make_biclusters( diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index 75d1ea15b8fbd..96deffe4fca6a 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -88,11 +88,12 @@ clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test) print("With sigmoid calibration: %1.3f" % clf_sigmoid_score) +import matplotlib.pyplot as plt + # %% # Plot data and the predicted probabilities # ----------------------------------------- from matplotlib import cm -import matplotlib.pyplot as plt plt.figure() y_unique = np.unique(y) diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py index bd36d7e4a654b..978d3154183fa 100644 --- a/examples/calibration/plot_calibration_curve.py +++ b/examples/calibration/plot_calibration_curve.py @@ -140,11 +140,11 @@ import pandas as pd from sklearn.metrics import ( - precision_score, - recall_score, - f1_score, brier_score_loss, + f1_score, log_loss, + precision_score, + recall_score, roc_auc_score, ) diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py index 24962a786ea03..fc6349f3dea5f 100644 --- a/examples/calibration/plot_calibration_multiclass.py +++ b/examples/calibration/plot_calibration_multiclass.py @@ -31,6 +31,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3). # License: BSD Style. import numpy as np + from sklearn.datasets import make_blobs np.random.seed(0) diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py index 87c3f51db5eb2..ec5887b63914d 100644 --- a/examples/classification/plot_classification_probability.py +++ b/examples/classification/plot_classification_probability.py @@ -23,12 +23,12 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.metrics import accuracy_score -from sklearn.linear_model import LogisticRegression -from sklearn.svm import SVC +from sklearn import datasets from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF -from sklearn import datasets +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score +from sklearn.svm import SVC iris = datasets.load_iris() X = iris.data[:, 0:2] # we only take the first two features for visualization diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py index e4c52d9e2564a..2cb17726131b5 100644 --- a/examples/classification/plot_classifier_comparison.py +++ b/examples/classification/plot_classifier_comparison.py @@ -25,22 +25,23 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap + +from sklearn.datasets import make_circles, make_classification, make_moons +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier +from sklearn.gaussian_process import GaussianProcessClassifier +from sklearn.gaussian_process.kernels import RBF +from sklearn.inspection import DecisionBoundaryDisplay from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.datasets import make_moons, make_circles, make_classification -from sklearn.neural_network import MLPClassifier +from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC -from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import RBF from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier -from sklearn.naive_bayes import GaussianNB -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis -from sklearn.inspection import DecisionBoundaryDisplay names = [ "Nearest Neighbors", diff --git a/examples/classification/plot_digits_classification.py b/examples/classification/plot_digits_classification.py index 385bc865cd48b..964f93e3b1e62 100644 --- a/examples/classification/plot_digits_classification.py +++ b/examples/classification/plot_digits_classification.py @@ -15,7 +15,7 @@ import matplotlib.pyplot as plt # Import datasets, classifiers and performance metrics -from sklearn import datasets, svm, metrics +from sklearn import datasets, metrics, svm from sklearn.model_selection import train_test_split ############################################################################### diff --git a/examples/classification/plot_lda.py b/examples/classification/plot_lda.py index 47487fc1f2caf..aec7565e39174 100644 --- a/examples/classification/plot_lda.py +++ b/examples/classification/plot_lda.py @@ -8,13 +8,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.covariance import OAS from sklearn.datasets import make_blobs from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.covariance import OAS - n_train = 20 # samples for training n_test = 200 # samples for testing diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index 785a8627c8ca7..4093edea3400a 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -15,8 +15,8 @@ class has its own standard deviation with QDA. # Colormap # -------- -import matplotlib.pyplot as plt import matplotlib as mpl +import matplotlib.pyplot as plt from matplotlib import colors cmap = colors.LinearSegmentedColormap( @@ -172,8 +172,10 @@ def plot_qda_cov(qda, splot): fontsize=15, ) -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.discriminant_analysis import ( + LinearDiscriminantAnalysis, + QuadraticDiscriminantAnalysis, +) for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # Linear Discriminant Analysis diff --git a/examples/cluster/plot_adjusted_for_chance_measures.py b/examples/cluster/plot_adjusted_for_chance_measures.py index 7985eb70e6a0b..19f5efa0cc693 100644 --- a/examples/cluster/plot_adjusted_for_chance_measures.py +++ b/examples/cluster/plot_adjusted_for_chance_measures.py @@ -24,9 +24,11 @@ # Author: Olivier Grisel # License: BSD 3 clause -import numpy as np -import matplotlib.pyplot as plt from time import time + +import matplotlib.pyplot as plt +import numpy as np + from sklearn import metrics diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py index 3007b0e6539a2..335dfafb39b28 100644 --- a/examples/cluster/plot_affinity_propagation.py +++ b/examples/cluster/plot_affinity_propagation.py @@ -9,8 +9,8 @@ """ -from sklearn.cluster import AffinityPropagation from sklearn import metrics +from sklearn.cluster import AffinityPropagation from sklearn.datasets import make_blobs # %% @@ -44,11 +44,12 @@ % metrics.silhouette_score(X, labels, metric="sqeuclidean") ) +from itertools import cycle + # %% # Plot result # ----------- import matplotlib.pyplot as plt -from itertools import cycle plt.close("all") plt.figure(1) diff --git a/examples/cluster/plot_agglomerative_clustering.py b/examples/cluster/plot_agglomerative_clustering.py index 9d590f572f121..1c844bd0a7c62 100644 --- a/examples/cluster/plot_agglomerative_clustering.py +++ b/examples/cluster/plot_agglomerative_clustering.py @@ -28,6 +28,7 @@ # License: BSD 3 clause import time + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/cluster/plot_agglomerative_dendrogram.py b/examples/cluster/plot_agglomerative_dendrogram.py index 2de5030d68f6d..20c22f4f0bb39 100644 --- a/examples/cluster/plot_agglomerative_dendrogram.py +++ b/examples/cluster/plot_agglomerative_dendrogram.py @@ -10,11 +10,11 @@ """ import numpy as np - from matplotlib import pyplot as plt from scipy.cluster.hierarchy import dendrogram -from sklearn.datasets import load_iris + from sklearn.cluster import AgglomerativeClustering +from sklearn.datasets import load_iris def plot_dendrogram(model, **kwargs): diff --git a/examples/cluster/plot_birch_vs_minibatchkmeans.py b/examples/cluster/plot_birch_vs_minibatchkmeans.py index 3d4185dc9368a..55502f446c146 100644 --- a/examples/cluster/plot_birch_vs_minibatchkmeans.py +++ b/examples/cluster/plot_birch_vs_minibatchkmeans.py @@ -25,17 +25,17 @@ # Alexandre Gramfort # License: BSD 3 clause -from joblib import cpu_count from itertools import cycle from time import time -import numpy as np -import matplotlib.pyplot as plt + import matplotlib.colors as colors +import matplotlib.pyplot as plt +import numpy as np +from joblib import cpu_count from sklearn.cluster import Birch, MiniBatchKMeans from sklearn.datasets import make_blobs - # Generate centers for the blobs so that it forms a 10 X 10 grid. xx = np.linspace(-22, 22, 10) yy = np.linspace(-22, 22, 10) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index 818f1cc612c2f..095c533cb2aec 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -13,9 +13,8 @@ """ import matplotlib.pyplot as plt -from sklearn.datasets import make_blobs from sklearn.cluster import BisectingKMeans, KMeans - +from sklearn.datasets import make_blobs print(__doc__) diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py index 8b52759c79018..92d5bd6bcdba9 100644 --- a/examples/cluster/plot_cluster_comparison.py +++ b/examples/cluster/plot_cluster_comparison.py @@ -26,14 +26,14 @@ import time import warnings +from itertools import cycle, islice -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn import cluster, datasets, mixture from sklearn.neighbors import kneighbors_graph from sklearn.preprocessing import StandardScaler -from itertools import cycle, islice np.random.seed(0) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index e089e7bdd609c..91d3a7317398d 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -19,15 +19,15 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt # Though the following import is not directly being used, it is required # for 3D projection to work with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +import numpy as np -from sklearn.cluster import KMeans from sklearn import datasets +from sklearn.cluster import KMeans np.random.seed(5) diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py index 229ca182c1e65..aadcfe9faac73 100644 --- a/examples/cluster/plot_coin_segmentation.py +++ b/examples/cluster/plot_coin_segmentation.py @@ -27,15 +27,14 @@ import time +import matplotlib.pyplot as plt import numpy as np from scipy.ndimage import gaussian_filter -import matplotlib.pyplot as plt from skimage.data import coins from skimage.transform import rescale -from sklearn.feature_extraction import image from sklearn.cluster import spectral_clustering - +from sklearn.feature_extraction import image # load the coins as a numpy array orig_coins = coins() diff --git a/examples/cluster/plot_color_quantization.py b/examples/cluster/plot_color_quantization.py index 6fc6cdd4a449f..c03a61bac6a80 100644 --- a/examples/cluster/plot_color_quantization.py +++ b/examples/cluster/plot_color_quantization.py @@ -26,13 +26,15 @@ # # License: BSD 3 clause -import numpy as np +from time import time + import matplotlib.pyplot as plt +import numpy as np + from sklearn.cluster import KMeans -from sklearn.metrics import pairwise_distances_argmin from sklearn.datasets import load_sample_image +from sklearn.metrics import pairwise_distances_argmin from sklearn.utils import shuffle -from time import time n_colors = 64 diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py index 620ee6bdcb86e..9d7007e7b68a3 100644 --- a/examples/cluster/plot_dbscan.py +++ b/examples/cluster/plot_dbscan.py @@ -10,12 +10,11 @@ import numpy as np -from sklearn.cluster import DBSCAN from sklearn import metrics +from sklearn.cluster import DBSCAN from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler - # %% # Generate sample data # -------------------- diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py index 18288da252024..33e91bba67c84 100644 --- a/examples/cluster/plot_digits_agglomeration.py +++ b/examples/cluster/plot_digits_agglomeration.py @@ -13,10 +13,10 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import datasets, cluster +from sklearn import cluster, datasets from sklearn.feature_extraction.image import grid_to_graph digits = datasets.load_digits() diff --git a/examples/cluster/plot_digits_linkage.py b/examples/cluster/plot_digits_linkage.py index 730f85c543356..ae67bd5d8e0f4 100644 --- a/examples/cluster/plot_digits_linkage.py +++ b/examples/cluster/plot_digits_linkage.py @@ -35,7 +35,7 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn import manifold, datasets +from sklearn import datasets, manifold digits = datasets.load_digits() X, y = digits.data, digits.target diff --git a/examples/cluster/plot_face_compress.py b/examples/cluster/plot_face_compress.py index 700d862ec6001..77263d84cab5f 100644 --- a/examples/cluster/plot_face_compress.py +++ b/examples/cluster/plot_face_compress.py @@ -14,13 +14,12 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause +import matplotlib.pyplot as plt import numpy as np import scipy as sp -import matplotlib.pyplot as plt from sklearn import cluster - try: # SciPy >= 0.16 have face in misc from scipy.misc import face diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index e2273326b9a12..f28ed194d2426 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -21,18 +21,17 @@ import shutil import tempfile -import numpy as np import matplotlib.pyplot as plt +import numpy as np from scipy import linalg, ndimage -from joblib import Memory -from sklearn.feature_extraction.image import grid_to_graph +from joblib import Memory from sklearn import feature_selection from sklearn.cluster import FeatureAgglomeration +from sklearn.feature_extraction.image import grid_to_graph from sklearn.linear_model import BayesianRidge +from sklearn.model_selection import GridSearchCV, KFold from sklearn.pipeline import Pipeline -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import KFold # %% # Set parameters diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py index e395571a1caad..b6464459160e3 100644 --- a/examples/cluster/plot_inductive_clustering.py +++ b/examples/cluster/plot_inductive_clustering.py @@ -24,6 +24,7 @@ # Christos Aridas import matplotlib.pyplot as plt + from sklearn.base import BaseEstimator, clone from sklearn.cluster import AgglomerativeClustering from sklearn.datasets import make_blobs @@ -32,7 +33,6 @@ from sklearn.utils.metaestimators import available_if from sklearn.utils.validation import check_is_fitted - N_SAMPLES = 5000 RANDOM_STATE = 42 diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index 94f8ff6c58f52..d71f884839c1d 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -14,8 +14,8 @@ # Author: Phil Roth # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.cluster import KMeans from sklearn.datasets import make_blobs diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py index fc79c867a8589..a048a8c1ee11c 100644 --- a/examples/cluster/plot_kmeans_digits.py +++ b/examples/cluster/plot_kmeans_digits.py @@ -34,6 +34,7 @@ # to group images such that the handwritten digits on the image are the same. import numpy as np + from sklearn.datasets import load_digits data, labels = load_digits(return_X_y=True) @@ -53,6 +54,7 @@ # * train and time the pipeline fitting; # * measure the performance of the clustering obtained via different metrics. from time import time + from sklearn import metrics from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler diff --git a/examples/cluster/plot_kmeans_plusplus.py b/examples/cluster/plot_kmeans_plusplus.py index eea2c2ec85093..66206ce7d7038 100644 --- a/examples/cluster/plot_kmeans_plusplus.py +++ b/examples/cluster/plot_kmeans_plusplus.py @@ -10,9 +10,10 @@ """ +import matplotlib.pyplot as plt + from sklearn.cluster import kmeans_plusplus from sklearn.datasets import make_blobs -import matplotlib.pyplot as plt # Generate sample data n_samples = 4000 diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py index 8f4e241100e24..a999e83fcac5d 100644 --- a/examples/cluster/plot_kmeans_silhouette_analysis.py +++ b/examples/cluster/plot_kmeans_silhouette_analysis.py @@ -31,14 +31,14 @@ """ -from sklearn.datasets import make_blobs -from sklearn.cluster import KMeans -from sklearn.metrics import silhouette_samples, silhouette_score - -import matplotlib.pyplot as plt import matplotlib.cm as cm +import matplotlib.pyplot as plt import numpy as np +from sklearn.cluster import KMeans +from sklearn.datasets import make_blobs +from sklearn.metrics import silhouette_samples, silhouette_score + # Generating the sample data from make_blobs # This particular setting has one distinct cluster and 3 clusters placed close # together. diff --git a/examples/cluster/plot_kmeans_stability_low_dim_dense.py b/examples/cluster/plot_kmeans_stability_low_dim_dense.py index a375c1cd103d7..7d8b903ab9623 100644 --- a/examples/cluster/plot_kmeans_stability_low_dim_dense.py +++ b/examples/cluster/plot_kmeans_stability_low_dim_dense.py @@ -26,14 +26,12 @@ # Author: Olivier Grisel # License: BSD 3 clause -import numpy as np -import matplotlib.pyplot as plt import matplotlib.cm as cm +import matplotlib.pyplot as plt +import numpy as np -from sklearn.utils import shuffle -from sklearn.utils import check_random_state -from sklearn.cluster import MiniBatchKMeans -from sklearn.cluster import KMeans +from sklearn.cluster import KMeans, MiniBatchKMeans +from sklearn.utils import check_random_state, shuffle random_state = np.random.RandomState(0) diff --git a/examples/cluster/plot_linkage_comparison.py b/examples/cluster/plot_linkage_comparison.py index af4c3cd2894af..dc009d0110f7c 100644 --- a/examples/cluster/plot_linkage_comparison.py +++ b/examples/cluster/plot_linkage_comparison.py @@ -25,13 +25,13 @@ import time import warnings +from itertools import cycle, islice -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn import cluster, datasets from sklearn.preprocessing import StandardScaler -from itertools import cycle, islice np.random.seed(0) diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py index 6a6827e5aa49d..0afa0bde35165 100644 --- a/examples/cluster/plot_mean_shift.py +++ b/examples/cluster/plot_mean_shift.py @@ -12,6 +12,7 @@ """ import numpy as np + from sklearn.cluster import MeanShift, estimate_bandwidth from sklearn.datasets import make_blobs @@ -38,11 +39,12 @@ print("number of estimated clusters : %d" % n_clusters_) +from itertools import cycle + # %% # Plot result # ----------- import matplotlib.pyplot as plt -from itertools import cycle plt.figure(1) plt.clf() diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py index de0a185949972..cd279781a4b03 100644 --- a/examples/cluster/plot_mini_batch_kmeans.py +++ b/examples/cluster/plot_mini_batch_kmeans.py @@ -21,6 +21,7 @@ # We start by generating the blobs of data to be clustered. import numpy as np + from sklearn.datasets import make_blobs np.random.seed(0) @@ -35,6 +36,7 @@ # ------------------------------ import time + from sklearn.cluster import KMeans k_means = KMeans(init="k-means++", n_clusters=3, n_init=10) diff --git a/examples/cluster/plot_optics.py b/examples/cluster/plot_optics.py index 5956a2d47afa5..c15de1e5a989d 100644 --- a/examples/cluster/plot_optics.py +++ b/examples/cluster/plot_optics.py @@ -20,11 +20,12 @@ # Adrin Jalali # License: BSD 3 clause -from sklearn.cluster import OPTICS, cluster_optics_dbscan import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import numpy as np +from sklearn.cluster import OPTICS, cluster_optics_dbscan + # Generate sample data np.random.seed(0) diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py index 0880cdb893839..cde504c0d82af 100644 --- a/examples/cluster/plot_segmentation_toy.py +++ b/examples/cluster/plot_segmentation_toy.py @@ -75,11 +75,12 @@ # that is close to a Voronoi partition graph.data = np.exp(-graph.data / graph.data.std()) +import matplotlib.pyplot as plt + # %% # Here we perform spectral clustering using the arpack solver since amg is # numerically unstable on this example. We then plot the results. from sklearn.cluster import spectral_clustering -import matplotlib.pyplot as plt labels = spectral_clustering(graph, n_clusters=4, eigen_solver="arpack") label_im = np.full(mask.shape, -1.0) diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py index 430d00a8b3730..42a628241ba2e 100644 --- a/examples/cluster/plot_ward_structured_vs_unstructured.py +++ b/examples/cluster/plot_ward_structured_vs_unstructured.py @@ -27,13 +27,14 @@ import time as time -# The following import is required -# for 3D projection to work with matplotlib < 3.2 - import mpl_toolkits.mplot3d # noqa: F401 - import numpy as np +from sklearn.datasets import make_swiss_roll + +# The following import is required +# for 3D projection to work with matplotlib < 3.2 + # %% # Generate data @@ -41,7 +42,6 @@ # # We start by generating the Swiss Roll dataset. -from sklearn.datasets import make_swiss_roll n_samples = 1500 noise = 0.05 diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py index d4798d828b321..669e817cbf81d 100644 --- a/examples/compose/plot_column_transformer.py +++ b/examples/compose/plot_column_transformer.py @@ -24,14 +24,14 @@ import numpy as np -from sklearn.preprocessing import FunctionTransformer +from sklearn.compose import ColumnTransformer from sklearn.datasets import fetch_20newsgroups from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction import DictVectorizer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics import classification_report from sklearn.pipeline import Pipeline -from sklearn.compose import ColumnTransformer +from sklearn.preprocessing import FunctionTransformer from sklearn.svm import LinearSVC ############################################################################## diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 2a801405fc1c3..3b6d648a80f27 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -33,11 +33,11 @@ from sklearn.compose import ColumnTransformer from sklearn.datasets import fetch_openml -from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer -from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split, GridSearchCV +from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler np.random.seed(0) diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py index cd2c65021c4d4..bbcb9213da0f6 100644 --- a/examples/compose/plot_compare_reduction.py +++ b/examples/compose/plot_compare_reduction.py @@ -27,14 +27,15 @@ # Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import load_digits +from sklearn.decomposition import NMF, PCA +from sklearn.feature_selection import SelectKBest, chi2 from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC -from sklearn.decomposition import PCA, NMF -from sklearn.feature_selection import SelectKBest, chi2 pipe = Pipeline( [ @@ -98,9 +99,10 @@ # cache. Hence, use the ``memory`` constructor parameter when the fitting # of a transformer is costly. -from joblib import Memory from shutil import rmtree +from joblib import Memory + # Create a temporary folder to store the transformers of the pipeline location = "cachedir" memory = Memory(location=location, verbose=10) diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index acd3068d991c9..f81b377cee759 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -15,15 +15,15 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd from sklearn import datasets from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression -from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler # Define a pipeline to search for the best combination of PCA truncation diff --git a/examples/compose/plot_feature_union.py b/examples/compose/plot_feature_union.py index e014b8b8808b9..01f7e02bfe44f 100644 --- a/examples/compose/plot_feature_union.py +++ b/examples/compose/plot_feature_union.py @@ -20,12 +20,12 @@ # # License: BSD 3 clause -from sklearn.pipeline import Pipeline, FeatureUnion -from sklearn.model_selection import GridSearchCV -from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.decomposition import PCA from sklearn.feature_selection import SelectKBest +from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import FeatureUnion, Pipeline +from sklearn.svm import SVC iris = load_iris() diff --git a/examples/compose/plot_transformed_target.py b/examples/compose/plot_transformed_target.py index 2454affb349cf..20de60f2a01bb 100644 --- a/examples/compose/plot_transformed_target.py +++ b/examples/compose/plot_transformed_target.py @@ -15,14 +15,14 @@ # Author: Guillaume Lemaitre # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.compose import TransformedTargetRegressor from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split from sklearn.linear_model import RidgeCV -from sklearn.compose import TransformedTargetRegressor from sklearn.metrics import median_absolute_error, r2_score +from sklearn.model_selection import train_test_split # %% # Synthetic example diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index be3bf4837eb9f..df9af8ea330ba 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -37,9 +37,10 @@ # Compute the likelihood on test data # ----------------------------------- -from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood from scipy import linalg +from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood + # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-2, 0, 30) negative_logliks = [ @@ -73,8 +74,8 @@ # are Gaussian, in particular for small samples. +from sklearn.covariance import OAS, LedoitWolf from sklearn.model_selection import GridSearchCV -from sklearn.covariance import LedoitWolf, OAS # GridSearch for an optimal shrinkage coefficient tuned_parameters = [{"shrinkage": shrinkages}] diff --git a/examples/covariance/plot_lw_vs_oas.py b/examples/covariance/plot_lw_vs_oas.py index 1fd84b180f50a..107f6bd1c29cc 100644 --- a/examples/covariance/plot_lw_vs_oas.py +++ b/examples/covariance/plot_lw_vs_oas.py @@ -21,11 +21,11 @@ """ -import numpy as np import matplotlib.pyplot as plt -from scipy.linalg import toeplitz, cholesky +import numpy as np +from scipy.linalg import cholesky, toeplitz -from sklearn.covariance import LedoitWolf, OAS +from sklearn.covariance import OAS, LedoitWolf np.random.seed(0) # %% diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py index b93d68a269706..bd61e5af22147 100644 --- a/examples/covariance/plot_mahalanobis_distances.py +++ b/examples/covariance/plot_mahalanobis_distances.py @@ -103,6 +103,7 @@ # designed to have a much larger variance in feature 2. import matplotlib.pyplot as plt + from sklearn.covariance import EmpiricalCovariance, MinCovDet # fit a MCD robust estimator to data diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py index 9cffa57beda0a..c543eeac16ba4 100644 --- a/examples/covariance/plot_robust_vs_empirical_covariance.py +++ b/examples/covariance/plot_robust_vs_empirical_covariance.py @@ -53,9 +53,9 @@ """ -import numpy as np -import matplotlib.pyplot as plt import matplotlib.font_manager +import matplotlib.pyplot as plt +import numpy as np from sklearn.covariance import EmpiricalCovariance, MinCovDet diff --git a/examples/covariance/plot_sparse_cov.py b/examples/covariance/plot_sparse_cov.py index 96a5486dc964e..a088aeb7e69c0 100644 --- a/examples/covariance/plot_sparse_cov.py +++ b/examples/covariance/plot_sparse_cov.py @@ -59,6 +59,7 @@ # ----------------- import numpy as np from scipy import linalg + from sklearn.datasets import make_sparse_spd_matrix n_samples = 60 diff --git a/examples/cross_decomposition/plot_compare_cross_decomposition.py b/examples/cross_decomposition/plot_compare_cross_decomposition.py index deccd7aa1932c..866c5b99584e5 100644 --- a/examples/cross_decomposition/plot_compare_cross_decomposition.py +++ b/examples/cross_decomposition/plot_compare_cross_decomposition.py @@ -20,9 +20,10 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA +import numpy as np + +from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression # ############################################################################# # Dataset based latent variables model diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py index 529225d11eead..9f3b5a7cf0460 100644 --- a/examples/cross_decomposition/plot_pcr_vs_pls.py +++ b/examples/cross_decomposition/plot_pcr_vs_pls.py @@ -33,6 +33,8 @@ """ +import matplotlib.pyplot as plt + # %% # The data # -------- @@ -42,7 +44,7 @@ # components of this dataset, i.e. the two directions that explain the most # variance in the data. import numpy as np -import matplotlib.pyplot as plt + from sklearn.decomposition import PCA rng = np.random.RandomState(0) @@ -87,6 +89,10 @@ plt.tight_layout() plt.show() +from sklearn.cross_decomposition import PLSRegression +from sklearn.decomposition import PCA +from sklearn.linear_model import LinearRegression + # %% # Projection on one component and predictive power # ------------------------------------------------ @@ -101,10 +107,7 @@ # use as training data. from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline -from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler -from sklearn.decomposition import PCA -from sklearn.cross_decomposition import PLSRegression X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py index a148dd37626e0..8519b0e70d1de 100644 --- a/examples/datasets/plot_digits_last_image.py +++ b/examples/datasets/plot_digits_last_image.py @@ -19,10 +19,10 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -from sklearn import datasets - import matplotlib.pyplot as plt +from sklearn import datasets + # Load the digits dataset digits = datasets.load_digits() diff --git a/examples/datasets/plot_random_dataset.py b/examples/datasets/plot_random_dataset.py index 4f3fdbbb11ef5..e5cbdb080b59f 100644 --- a/examples/datasets/plot_random_dataset.py +++ b/examples/datasets/plot_random_dataset.py @@ -16,9 +16,7 @@ import matplotlib.pyplot as plt -from sklearn.datasets import make_classification -from sklearn.datasets import make_blobs -from sklearn.datasets import make_gaussian_quantiles +from sklearn.datasets import make_blobs, make_classification, make_gaussian_quantiles plt.figure(figsize=(8, 8)) plt.subplots_adjust(bottom=0.05, top=0.9, left=0.05, right=0.95) diff --git a/examples/datasets/plot_random_multilabel_dataset.py b/examples/datasets/plot_random_multilabel_dataset.py index f22c7b9695c42..e6e2d6ad9edcf 100644 --- a/examples/datasets/plot_random_multilabel_dataset.py +++ b/examples/datasets/plot_random_multilabel_dataset.py @@ -35,8 +35,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import make_multilabel_classification as make_ml_clf diff --git a/examples/decomposition/plot_beta_divergence.py b/examples/decomposition/plot_beta_divergence.py index 2a69f9a22ffb4..e06bde0a83e10 100644 --- a/examples/decomposition/plot_beta_divergence.py +++ b/examples/decomposition/plot_beta_divergence.py @@ -8,8 +8,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.decomposition._nmf import _beta_divergence x = np.linspace(0.001, 4, 1000) diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index 0eb07dc3efb2d..54cb3fa9faac7 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -21,12 +21,11 @@ import logging -from numpy.random import RandomState import matplotlib.pyplot as plt +from numpy.random import RandomState +from sklearn import cluster, decomposition from sklearn.datasets import fetch_olivetti_faces -from sklearn import cluster -from sklearn import decomposition rng = RandomState(0) diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py index 15945e5075ce8..2afd204a6dbdd 100644 --- a/examples/decomposition/plot_ica_blind_source_separation.py +++ b/examples/decomposition/plot_ica_blind_source_separation.py @@ -14,11 +14,11 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from scipy import signal -from sklearn.decomposition import FastICA, PCA +from sklearn.decomposition import PCA, FastICA # ############################################################################# # Generate sample data diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py index 71019be17b5ba..8d7ee613041e9 100644 --- a/examples/decomposition/plot_image_denoising.py +++ b/examples/decomposition/plot_image_denoising.py @@ -38,7 +38,6 @@ import numpy as np import scipy as sp - try: # SciPy >= 0.16 have face in misc from scipy.misc import face diff --git a/examples/decomposition/plot_incremental_pca.py b/examples/decomposition/plot_incremental_pca.py index adc7f83f3cda0..8e5aeccfddc8a 100644 --- a/examples/decomposition/plot_incremental_pca.py +++ b/examples/decomposition/plot_incremental_pca.py @@ -22,8 +22,8 @@ # Authors: Kyle Kastner # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import load_iris from sklearn.decomposition import PCA, IncrementalPCA diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py index e539af6d66b7a..93a35fb1a3762 100644 --- a/examples/decomposition/plot_pca_3d.py +++ b/examples/decomposition/plot_pca_3d.py @@ -20,7 +20,6 @@ # --------------- import numpy as np - from scipy import stats e = np.exp(1) @@ -53,13 +52,13 @@ def pdf(x): # Plot the figures # ---------------- -from sklearn.decomposition import PCA - import matplotlib.pyplot as plt # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +from sklearn.decomposition import PCA + def plot_figs(fig_num, elev, azim): fig = plt.figure(fig_num, figsize=(4, 3)) diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py index e42bf7cf91d7e..13789479c6af3 100644 --- a/examples/decomposition/plot_pca_iris.py +++ b/examples/decomposition/plot_pca_iris.py @@ -14,12 +14,10 @@ # Code source: Gaël Varoquaux # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np - -from sklearn import decomposition -from sklearn import datasets +from sklearn import datasets, decomposition np.random.seed(5) diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py index 4c934ab756c3e..e269fc6b5c278 100644 --- a/examples/decomposition/plot_pca_vs_fa_model_selection.py +++ b/examples/decomposition/plot_pca_vs_fa_model_selection.py @@ -34,7 +34,6 @@ # --------------- import numpy as np - from scipy import linalg n_samples, n_features, rank = 500, 25, 5 @@ -56,10 +55,9 @@ import matplotlib.pyplot as plt +from sklearn.covariance import LedoitWolf, ShrunkCovariance from sklearn.decomposition import PCA, FactorAnalysis -from sklearn.covariance import ShrunkCovariance, LedoitWolf -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GridSearchCV, cross_val_score n_components = np.arange(0, n_features, 5) # options for n_components diff --git a/examples/decomposition/plot_sparse_coding.py b/examples/decomposition/plot_sparse_coding.py index 4f4602f1ff1ac..c45cd3c83b04f 100644 --- a/examples/decomposition/plot_sparse_coding.py +++ b/examples/decomposition/plot_sparse_coding.py @@ -16,8 +16,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.decomposition import SparseCoder diff --git a/examples/decomposition/plot_varimax_fa.py b/examples/decomposition/plot_varimax_fa.py index 0103ccc65b5d6..97d6cf8fdf54b 100644 --- a/examples/decomposition/plot_varimax_fa.py +++ b/examples/decomposition/plot_varimax_fa.py @@ -22,9 +22,9 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.decomposition import FactorAnalysis, PCA -from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_iris +from sklearn.decomposition import PCA, FactorAnalysis +from sklearn.preprocessing import StandardScaler # %% # Load Iris data diff --git a/examples/ensemble/plot_adaboost_hastie_10_2.py b/examples/ensemble/plot_adaboost_hastie_10_2.py index 4a61639cb2494..c2c791aa6ff67 100644 --- a/examples/ensemble/plot_adaboost_hastie_10_2.py +++ b/examples/ensemble/plot_adaboost_hastie_10_2.py @@ -94,6 +94,7 @@ # added to the ensemble. import numpy as np + from sklearn.metrics import zero_one_loss ada_discrete_err = np.zeros((n_estimators,)) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index c94cc94959576..3f7af092b0a6c 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -35,7 +35,6 @@ from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier - X, y = make_gaussian_quantiles( n_samples=13000, n_features=10, n_classes=3, random_state=1 ) diff --git a/examples/ensemble/plot_adaboost_regression.py b/examples/ensemble/plot_adaboost_regression.py index e60051d1c25a0..65f0879c40804 100644 --- a/examples/ensemble/plot_adaboost_regression.py +++ b/examples/ensemble/plot_adaboost_regression.py @@ -17,11 +17,13 @@ # # License: BSD 3 clause +import matplotlib.pyplot as plt + # importing necessary libraries import numpy as np -import matplotlib.pyplot as plt -from sklearn.tree import DecisionTreeRegressor + from sklearn.ensemble import AdaBoostRegressor +from sklearn.tree import DecisionTreeRegressor # Create the dataset rng = np.random.RandomState(1) diff --git a/examples/ensemble/plot_adaboost_twoclass.py b/examples/ensemble/plot_adaboost_twoclass.py index 19679c6285d3b..d1e89c47b7fcf 100644 --- a/examples/ensemble/plot_adaboost_twoclass.py +++ b/examples/ensemble/plot_adaboost_twoclass.py @@ -21,14 +21,13 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.ensemble import AdaBoostClassifier -from sklearn.tree import DecisionTreeClassifier from sklearn.datasets import make_gaussian_quantiles +from sklearn.ensemble import AdaBoostClassifier from sklearn.inspection import DecisionBoundaryDisplay - +from sklearn.tree import DecisionTreeClassifier # Construct dataset X1, y1 = make_gaussian_quantiles( diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py index f8868a7003e4c..3a8909436fa1c 100644 --- a/examples/ensemble/plot_bias_variance.py +++ b/examples/ensemble/plot_bias_variance.py @@ -66,8 +66,8 @@ # Author: Gilles Louppe # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.ensemble import BaggingRegressor from sklearn.tree import DecisionTreeRegressor diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py index bd678af42a7d1..972ca1f6259aa 100644 --- a/examples/ensemble/plot_ensemble_oob.py +++ b/examples/ensemble/plot_ensemble_oob.py @@ -26,9 +26,10 @@ # # License: BSD 3 Clause +from collections import OrderedDict + import matplotlib.pyplot as plt -from collections import OrderedDict from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py index 409396a0376b8..45db0afb2ec4d 100644 --- a/examples/ensemble/plot_feature_transformation.py +++ b/examples/ensemble/plot_feature_transformation.py @@ -59,7 +59,7 @@ # First, we will start by training the random forest and gradient boosting on # the separated training set -from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier +from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier random_forest = RandomForestClassifier( n_estimators=n_estimators, max_depth=max_depth, random_state=10 @@ -100,8 +100,7 @@ # method `apply`. The pipeline in scikit-learn expects a call to `transform`. # Therefore, we wrapped the call to `apply` within a `FunctionTransformer`. -from sklearn.preprocessing import FunctionTransformer -from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import FunctionTransformer, OneHotEncoder def rf_apply(X, model): @@ -138,6 +137,7 @@ def gbdt_apply(X, model): # We can finally show the different ROC curves for all the models. import matplotlib.pyplot as plt + from sklearn.metrics import RocCurveDisplay fig, ax = plt.subplots() diff --git a/examples/ensemble/plot_forest_importances.py b/examples/ensemble/plot_forest_importances.py index fbda63b26faee..269451168dd7a 100644 --- a/examples/ensemble/plot_forest_importances.py +++ b/examples/ensemble/plot_forest_importances.py @@ -57,6 +57,7 @@ # cardinality** features (many unique values). See # :ref:`permutation_importance` as an alternative below. import time + import numpy as np start_time = time.time() diff --git a/examples/ensemble/plot_forest_importances_faces.py b/examples/ensemble/plot_forest_importances_faces.py index 3848873c297de..8b8e8751ec5a2 100644 --- a/examples/ensemble/plot_forest_importances_faces.py +++ b/examples/ensemble/plot_forest_importances_faces.py @@ -59,6 +59,7 @@ # cardinality** features (many unique values). See # :ref:`permutation_importance` as an alternative. import time + import matplotlib.pyplot as plt start_time = time.time() diff --git a/examples/ensemble/plot_forest_iris.py b/examples/ensemble/plot_forest_iris.py index ee414db7125dc..6aaceea88efd2 100644 --- a/examples/ensemble/plot_forest_iris.py +++ b/examples/ensemble/plot_forest_iris.py @@ -42,15 +42,15 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap from sklearn.datasets import load_iris from sklearn.ensemble import ( - RandomForestClassifier, - ExtraTreesClassifier, AdaBoostClassifier, + ExtraTreesClassifier, + RandomForestClassifier, ) from sklearn.tree import DecisionTreeClassifier diff --git a/examples/ensemble/plot_gradient_boosting_categorical.py b/examples/ensemble/plot_gradient_boosting_categorical.py index 6eca645654086..2b01218d19130 100644 --- a/examples/ensemble/plot_gradient_boosting_categorical.py +++ b/examples/ensemble/plot_gradient_boosting_categorical.py @@ -76,10 +76,9 @@ # As a baseline, we create an estimator where the categorical features are # dropped: +from sklearn.compose import make_column_selector, make_column_transformer from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.pipeline import make_pipeline -from sklearn.compose import make_column_transformer -from sklearn.compose import make_column_selector dropper = make_column_transformer( ("drop", make_column_selector(dtype_include="category")), remainder="passthrough" @@ -113,9 +112,10 @@ # were ordered quantities, i.e. the categories will be encoded as 0, 1, 2, # etc., and treated as continuous features. -from sklearn.preprocessing import OrdinalEncoder import numpy as np +from sklearn.preprocessing import OrdinalEncoder + ordinal_encoder = make_column_transformer( ( OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan), @@ -161,9 +161,10 @@ # models performance in terms of # :func:`~metrics.mean_absolute_percentage_error` and fit times. -from sklearn.model_selection import cross_validate import matplotlib.pyplot as plt +from sklearn.model_selection import cross_validate + scoring = "neg_mean_absolute_percentage_error" n_cv_folds = 3 diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py index 6f1013eed9564..f271f80a07c55 100644 --- a/examples/ensemble/plot_gradient_boosting_early_stopping.py +++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py @@ -38,11 +38,10 @@ import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import ensemble -from sklearn import datasets +from sklearn import datasets, ensemble from sklearn.model_selection import train_test_split data_list = [ diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py index 8182eafc2969a..5a3a5b6fbf11a 100644 --- a/examples/ensemble/plot_gradient_boosting_oob.py +++ b/examples/ensemble/plot_gradient_boosting_oob.py @@ -29,14 +29,12 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from scipy.special import expit from sklearn import ensemble -from sklearn.model_selection import KFold -from sklearn.model_selection import train_test_split - -from scipy.special import expit +from sklearn.model_selection import KFold, train_test_split # Generate data (adapted from G. Ridgeway's gbm example) n_samples = 1000 diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index 9e823439b948b..37baf7639c4aa 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -12,6 +12,7 @@ # Generate some data for a synthetic regression problem by applying the # function f to uniformly sampled random inputs. import numpy as np + from sklearn.model_selection import train_test_split @@ -58,7 +59,6 @@ def f(x): from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import mean_pinball_loss, mean_squared_error - all_models = {} common_params = dict( learning_rate=0.05, @@ -88,7 +88,6 @@ def f(x): # 90% interval (from 5th to 95th conditional percentiles). import matplotlib.pyplot as plt - y_pred = all_models["mse"].predict(xx) y_lower = all_models["q 0.05"].predict(xx) y_upper = all_models["q 0.95"].predict(xx) @@ -231,11 +230,12 @@ def coverage_fraction(y, y_low, y_high): # of the 5th percentile by selecting the best model parameters by # cross-validation on the pinball loss with alpha=0.05: +from pprint import pprint + # %% from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.model_selection import HalvingRandomSearchCV from sklearn.metrics import make_scorer -from pprint import pprint +from sklearn.model_selection import HalvingRandomSearchCV param_grid = dict( learning_rate=[0.05, 0.1, 0.2], diff --git a/examples/ensemble/plot_gradient_boosting_regression.py b/examples/ensemble/plot_gradient_boosting_regression.py index dc29bfbda8f77..dacfea9a4d39a 100644 --- a/examples/ensemble/plot_gradient_boosting_regression.py +++ b/examples/ensemble/plot_gradient_boosting_regression.py @@ -23,6 +23,7 @@ import matplotlib.pyplot as plt import numpy as np + from sklearn import datasets, ensemble from sklearn.inspection import permutation_importance from sklearn.metrics import mean_squared_error diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py index 28190c16b631e..45924dd611f6b 100644 --- a/examples/ensemble/plot_gradient_boosting_regularization.py +++ b/examples/ensemble/plot_gradient_boosting_regularization.py @@ -25,12 +25,10 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import ensemble -from sklearn import datasets - +from sklearn import datasets, ensemble from sklearn.model_selection import train_test_split X, y = datasets.make_hastie_10_2(n_samples=4000, random_state=1) diff --git a/examples/ensemble/plot_isolation_forest.py b/examples/ensemble/plot_isolation_forest.py index 5ffe9eb799ac9..587f62fb14ba0 100644 --- a/examples/ensemble/plot_isolation_forest.py +++ b/examples/ensemble/plot_isolation_forest.py @@ -23,8 +23,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.ensemble import IsolationForest rng = np.random.RandomState(42) diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py index fc1ced458b09e..0076ec0115584 100644 --- a/examples/ensemble/plot_monotonic_constraints.py +++ b/examples/ensemble/plot_monotonic_constraints.py @@ -20,11 +20,11 @@ """ -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.inspection import PartialDependenceDisplay -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.inspection import PartialDependenceDisplay rng = np.random.RandomState(0) diff --git a/examples/ensemble/plot_random_forest_embedding.py b/examples/ensemble/plot_random_forest_embedding.py index 000b83e67b92a..fe26e04ca7789 100644 --- a/examples/ensemble/plot_random_forest_embedding.py +++ b/examples/ensemble/plot_random_forest_embedding.py @@ -26,12 +26,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import make_circles -from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier from sklearn.decomposition import TruncatedSVD +from sklearn.ensemble import ExtraTreesClassifier, RandomTreesEmbedding from sklearn.naive_bayes import BernoulliNB # make a synthetic dataset diff --git a/examples/ensemble/plot_random_forest_regression_multioutput.py b/examples/ensemble/plot_random_forest_regression_multioutput.py index 4b3d4f4a9a728..ce8346c329127 100644 --- a/examples/ensemble/plot_random_forest_regression_multioutput.py +++ b/examples/ensemble/plot_random_forest_regression_multioutput.py @@ -25,13 +25,13 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.multioutput import MultiOutputRegressor - # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) diff --git a/examples/ensemble/plot_stack_predictors.py b/examples/ensemble/plot_stack_predictors.py index a311f5966880c..c6da319e83a7c 100644 --- a/examples/ensemble/plot_stack_predictors.py +++ b/examples/ensemble/plot_stack_predictors.py @@ -132,8 +132,7 @@ def load_ames_housing(): # Then, we will now define the preprocessor used when the ending regressor # is a linear model. -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import OneHotEncoder, StandardScaler cat_linear_processor = OneHotEncoder(handle_unknown="ignore") num_linear_processor = make_pipeline( @@ -210,8 +209,10 @@ def load_ames_housing(): import time + import matplotlib.pyplot as plt -from sklearn.model_selection import cross_validate, cross_val_predict + +from sklearn.model_selection import cross_val_predict, cross_validate def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time): diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index e6dc68eeadf98..90441c6d28339 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -28,11 +28,11 @@ import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.tree import DecisionTreeClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC from sklearn.ensemble import VotingClassifier from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC +from sklearn.tree import DecisionTreeClassifier # Loading some example data iris = datasets.load_iris() diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py index 54c290c3073e0..14f4f4330c045 100644 --- a/examples/ensemble/plot_voting_probas.py +++ b/examples/ensemble/plot_voting_probas.py @@ -23,13 +23,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.ensemble import RandomForestClassifier, VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import VotingClassifier clf1 = LogisticRegression(max_iter=1000, random_state=123) clf2 = RandomForestClassifier(n_estimators=100, random_state=123) diff --git a/examples/ensemble/plot_voting_regressor.py b/examples/ensemble/plot_voting_regressor.py index 23e709cc9e62a..d33becca505e3 100644 --- a/examples/ensemble/plot_voting_regressor.py +++ b/examples/ensemble/plot_voting_regressor.py @@ -26,10 +26,12 @@ import matplotlib.pyplot as plt from sklearn.datasets import load_diabetes -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import ( + GradientBoostingRegressor, + RandomForestRegressor, + VotingRegressor, +) from sklearn.linear_model import LinearRegression -from sklearn.ensemble import VotingRegressor # %% # Training classifiers diff --git a/examples/exercises/plot_cv_digits.py b/examples/exercises/plot_cv_digits.py index e43bbd86bb027..ebad3a55098b5 100644 --- a/examples/exercises/plot_cv_digits.py +++ b/examples/exercises/plot_cv_digits.py @@ -11,8 +11,9 @@ """ import numpy as np -from sklearn.model_selection import cross_val_score + from sklearn import datasets, svm +from sklearn.model_selection import cross_val_score X, y = datasets.load_digits(return_X_y=True) diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py index 877e615659743..25b0171c66421 100644 --- a/examples/exercises/plot_digits_classification_exercise.py +++ b/examples/exercises/plot_digits_classification_exercise.py @@ -12,7 +12,7 @@ """ -from sklearn import datasets, neighbors, linear_model +from sklearn import datasets, linear_model, neighbors X_digits, y_digits = datasets.load_digits(return_X_y=True) X_digits = X_digits / X_digits.max() diff --git a/examples/exercises/plot_iris_exercise.py b/examples/exercises/plot_iris_exercise.py index 74da8c27889c9..07687b920e1b8 100644 --- a/examples/exercises/plot_iris_exercise.py +++ b/examples/exercises/plot_iris_exercise.py @@ -10,8 +10,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets, svm iris = datasets.load_iris() diff --git a/examples/feature_selection/plot_f_test_vs_mi.py b/examples/feature_selection/plot_f_test_vs_mi.py index a8cfc5d426bbc..d6b5d13da6042 100644 --- a/examples/feature_selection/plot_f_test_vs_mi.py +++ b/examples/feature_selection/plot_f_test_vs_mi.py @@ -23,8 +23,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.feature_selection import f_regression, mutual_info_regression np.random.seed(0) diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py index 33ac2bd558dc5..2cf64cb6ea598 100644 --- a/examples/feature_selection/plot_feature_selection.py +++ b/examples/feature_selection/plot_feature_selection.py @@ -21,6 +21,7 @@ # -------------------- # import numpy as np + from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py index 9684f5fabd383..553f38f9c674f 100644 --- a/examples/feature_selection/plot_rfe_digits.py +++ b/examples/feature_selection/plot_rfe_digits.py @@ -12,10 +12,11 @@ """ # noqa: E501 -from sklearn.svm import SVC +import matplotlib.pyplot as plt + from sklearn.datasets import load_digits from sklearn.feature_selection import RFE -import matplotlib.pyplot as plt +from sklearn.svm import SVC # Load the digits dataset digits = load_digits() diff --git a/examples/feature_selection/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py index 4ebd4b55730c8..f44f086f96842 100644 --- a/examples/feature_selection/plot_rfe_with_cross_validation.py +++ b/examples/feature_selection/plot_rfe_with_cross_validation.py @@ -9,10 +9,11 @@ """ import matplotlib.pyplot as plt -from sklearn.svm import SVC -from sklearn.model_selection import StratifiedKFold -from sklearn.feature_selection import RFECV + from sklearn.datasets import make_classification +from sklearn.feature_selection import RFECV +from sklearn.model_selection import StratifiedKFold +from sklearn.svm import SVC # Build a classification task using 3 informative features X, y = make_classification( diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py index 16f63868feae0..34764d72c7c35 100644 --- a/examples/feature_selection/plot_select_from_model_diabetes.py +++ b/examples/feature_selection/plot_select_from_model_diabetes.py @@ -46,6 +46,7 @@ # :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`. import matplotlib.pyplot as plt import numpy as np + from sklearn.linear_model import RidgeCV ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y) @@ -55,6 +56,8 @@ plt.title("Feature importances via coefficients") plt.show() +from time import time + # %% # Selecting features based on importance # -------------------------------------- @@ -68,7 +71,6 @@ # Since we want to select only 2 features, we will set this threshold slightly # above the coefficient of third most important feature. from sklearn.feature_selection import SelectFromModel -from time import time threshold = np.sort(importance)[-3] + 0.01 diff --git a/examples/gaussian_process/plot_compare_gpr_krr.py b/examples/gaussian_process/plot_compare_gpr_krr.py index 42c013523f79c..a6136506341ee 100644 --- a/examples/gaussian_process/plot_compare_gpr_krr.py +++ b/examples/gaussian_process/plot_compare_gpr_krr.py @@ -125,6 +125,7 @@ # # Thus, let's use such a :class:`~sklearn.kernel_ridge.KernelRidge`. import time + from sklearn.gaussian_process.kernels import ExpSineSquared from sklearn.kernel_ridge import KernelRidge diff --git a/examples/gaussian_process/plot_gpc.py b/examples/gaussian_process/plot_gpc.py index e2d78fa23f09e..21a99065e06ce 100644 --- a/examples/gaussian_process/plot_gpc.py +++ b/examples/gaussian_process/plot_gpc.py @@ -27,13 +27,11 @@ # License: BSD 3 clause import numpy as np - from matplotlib import pyplot as plt -from sklearn.metrics import accuracy_score, log_loss from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF - +from sklearn.metrics import accuracy_score, log_loss # Generate data train_size = 50 diff --git a/examples/gaussian_process/plot_gpc_iris.py b/examples/gaussian_process/plot_gpc_iris.py index ce0ed066a1377..88c536d8824c8 100644 --- a/examples/gaussian_process/plot_gpc_iris.py +++ b/examples/gaussian_process/plot_gpc_iris.py @@ -10,8 +10,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF diff --git a/examples/gaussian_process/plot_gpc_isoprobability.py b/examples/gaussian_process/plot_gpc_isoprobability.py index 84f1ecb98bd3c..10785b279f1f0 100644 --- a/examples/gaussian_process/plot_gpc_isoprobability.py +++ b/examples/gaussian_process/plot_gpc_isoprobability.py @@ -15,12 +15,12 @@ # License: BSD 3 clause import numpy as np - -from matplotlib import pyplot as plt from matplotlib import cm +from matplotlib import pyplot as plt from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import DotProduct, ConstantKernel as C +from sklearn.gaussian_process.kernels import ConstantKernel as C +from sklearn.gaussian_process.kernels import DotProduct # A few constants lim = 8 diff --git a/examples/gaussian_process/plot_gpc_xor.py b/examples/gaussian_process/plot_gpc_xor.py index 6eebbcf80098e..d963ce6aaea40 100644 --- a/examples/gaussian_process/plot_gpc_xor.py +++ b/examples/gaussian_process/plot_gpc_xor.py @@ -15,13 +15,12 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF, DotProduct - xx, yy = np.meshgrid(np.linspace(-3, 3, 50), np.linspace(-3, 3, 50)) rng = np.random.RandomState(0) X = rng.randn(200, 2) diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py index bfc1c21631b26..a3acd1dbfcbd3 100644 --- a/examples/gaussian_process/plot_gpr_co2.py +++ b/examples/gaussian_process/plot_gpr_co2.py @@ -172,6 +172,7 @@ # Thus, we create synthetic data from 1958 to the current month. In addition, # we need to add the subtracted mean computed during training. import datetime + import numpy as np today = datetime.datetime.now() diff --git a/examples/gaussian_process/plot_gpr_on_structured_data.py b/examples/gaussian_process/plot_gpr_on_structured_data.py index bc8c169c91f67..58d567b52781f 100644 --- a/examples/gaussian_process/plot_gpr_on_structured_data.py +++ b/examples/gaussian_process/plot_gpr_on_structured_data.py @@ -38,13 +38,12 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn.gaussian_process.kernels import Kernel, Hyperparameter -from sklearn.gaussian_process.kernels import GenericKernelMixin -from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.gaussian_process import GaussianProcessClassifier +import numpy as np + from sklearn.base import clone +from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor +from sklearn.gaussian_process.kernels import GenericKernelMixin, Hyperparameter, Kernel class SequenceKernel(GenericKernelMixin, Kernel): diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index d83922817e5de..9dc8b6c831710 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -44,21 +44,21 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd +from sklearn.datasets import fetch_california_housing +from sklearn.ensemble import RandomForestRegressor + # To use this experimental feature, we need to explicitly ask for it: from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.datasets import fetch_california_housing -from sklearn.impute import SimpleImputer -from sklearn.impute import IterativeImputer -from sklearn.linear_model import BayesianRidge, Ridge +from sklearn.impute import IterativeImputer, SimpleImputer from sklearn.kernel_approximation import Nystroem -from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import BayesianRidge, Ridge +from sklearn.model_selection import cross_val_score from sklearn.neighbors import KNeighborsRegressor from sklearn.pipeline import make_pipeline -from sklearn.model_selection import cross_val_score N_SPLITS = 5 diff --git a/examples/impute/plot_missing_values.py b/examples/impute/plot_missing_values.py index ca800ba3602b2..70ff87b92f056 100644 --- a/examples/impute/plot_missing_values.py +++ b/examples/impute/plot_missing_values.py @@ -44,9 +44,7 @@ import numpy as np -from sklearn.datasets import fetch_california_housing -from sklearn.datasets import load_diabetes - +from sklearn.datasets import fetch_california_housing, load_diabetes rng = np.random.RandomState(42) @@ -95,11 +93,10 @@ def add_missing_values(X_full, y_full): # To use the experimental IterativeImputer, we need to explicitly ask for it: from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer +from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer from sklearn.model_selection import cross_val_score from sklearn.pipeline import make_pipeline - N_SPLITS = 4 regressor = RandomForestRegressor(random_state=0) @@ -261,7 +258,6 @@ def get_impute_iterative(X_missing, y_missing): import matplotlib.pyplot as plt - n_bars = len(mses_diabetes) xval = np.arange(n_bars) diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py index 3cc557c64b69c..644cdcfa4a03c 100644 --- a/examples/inspection/plot_linear_model_coefficient_interpretation.py +++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py @@ -29,13 +29,16 @@ """ +import matplotlib.pyplot as plt + # %% import numpy as np -import scipy as sp import pandas as pd -import matplotlib.pyplot as plt +import scipy as sp import seaborn as sns +from sklearn.datasets import fetch_openml + # %% # The dataset: wages # ------------------ @@ -44,7 +47,6 @@ # Note that setting the parameter `as_frame` to True will retrieve the data # as a pandas dataframe. -from sklearn.datasets import fetch_openml survey = fetch_openml(data_id=534, as_frame=True, parser="pandas") @@ -143,9 +145,9 @@ # To describe the dataset as a linear model we use a ridge regressor # with a very small regularization and to model the logarithm of the WAGE. -from sklearn.pipeline import make_pipeline -from sklearn.linear_model import Ridge from sklearn.compose import TransformedTargetRegressor +from sklearn.linear_model import Ridge +from sklearn.pipeline import make_pipeline model = make_pipeline( preprocessor, @@ -307,8 +309,7 @@ # their robustness is not guaranteed, and they should probably be interpreted # with caution. -from sklearn.model_selection import cross_validate -from sklearn.model_selection import RepeatedKFold +from sklearn.model_selection import RepeatedKFold, cross_validate cv = RepeatedKFold(n_splits=5, n_repeats=5, random_state=0) cv_model = cross_validate( diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index d7480a7898424..c3b22aec358a1 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -43,6 +43,7 @@ # (here the average target, by default). import pandas as pd + from sklearn.datasets import fetch_california_housing from sklearn.model_selection import train_test_split @@ -71,9 +72,10 @@ # single-variable partial dependence plots. from time import time + +from sklearn.neural_network import MLPRegressor from sklearn.pipeline import make_pipeline from sklearn.preprocessing import QuantileTransformer -from sklearn.neural_network import MLPRegressor print("Training MLPRegressor...") tic = time() @@ -247,6 +249,9 @@ ) display.figure_.subplots_adjust(wspace=0.4, hspace=0.3) +# unused but required import for doing 3d projections with matplotlib < 3.2 +import mpl_toolkits.mplot3d # noqa: F401 + # %% # The two-way partial dependence plot shows the dependence of median house # price on joint values of house age and average occupants per household. We @@ -262,9 +267,6 @@ # this time in 3 dimensions. import numpy as np -# unused but required import for doing 3d projections with matplotlib < 3.2 -import mpl_toolkits.mplot3d # noqa: F401 - from sklearn.inspection import partial_dependence fig = plt.figure() diff --git a/examples/inspection/plot_permutation_importance.py b/examples/inspection/plot_permutation_importance.py index 9e3724687a306..8840768d7ea39 100644 --- a/examples/inspection/plot_permutation_importance.py +++ b/examples/inspection/plot_permutation_importance.py @@ -56,6 +56,8 @@ X = X[categorical_columns + numerical_columns] X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) +from sklearn.compose import ColumnTransformer + # %% # We define a predictive model based on a random forest. Therefore, we will make # the following preprocessing steps: @@ -66,7 +68,6 @@ # numerical features using a mean strategy. from sklearn.ensemble import RandomForestClassifier from sklearn.impute import SimpleImputer -from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.preprocessing import OrdinalEncoder diff --git a/examples/inspection/plot_permutation_importance_multicollinear.py b/examples/inspection/plot_permutation_importance_multicollinear.py index 59871c00946a6..e14916e808af9 100644 --- a/examples/inspection/plot_permutation_importance_multicollinear.py +++ b/examples/inspection/plot_permutation_importance_multicollinear.py @@ -22,9 +22,9 @@ import matplotlib.pyplot as plt import numpy as np -from scipy.stats import spearmanr from scipy.cluster import hierarchy from scipy.spatial.distance import squareform +from scipy.stats import spearmanr from sklearn.datasets import load_breast_cancer from sklearn.ensemble import RandomForestClassifier diff --git a/examples/kernel_approximation/plot_scalable_poly_kernels.py b/examples/kernel_approximation/plot_scalable_poly_kernels.py index ade27e16e349a..5e9126389a5f9 100644 --- a/examples/kernel_approximation/plot_scalable_poly_kernels.py +++ b/examples/kernel_approximation/plot_scalable_poly_kernels.py @@ -64,8 +64,8 @@ # the LIBSVM webpage, and then normalize to unit length as done in the # original Tensor Sketch paper [1]. -from sklearn.preprocessing import MinMaxScaler, Normalizer from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import MinMaxScaler, Normalizer mm = make_pipeline(MinMaxScaler(), Normalizer()) X_train = mm.fit_transform(X_train) @@ -80,6 +80,7 @@ # plot them later. import time + from sklearn.svm import LinearSVC results = {} diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py index 261fec8aeee3b..79b49fb76ef9a 100644 --- a/examples/linear_model/plot_ard.py +++ b/examples/linear_model/plot_ard.py @@ -54,7 +54,8 @@ # coefficients. import pandas as pd -from sklearn.linear_model import ARDRegression, LinearRegression, BayesianRidge + +from sklearn.linear_model import ARDRegression, BayesianRidge, LinearRegression olr = LinearRegression().fit(X, y) brr = BayesianRidge(compute_score=True, n_iter=30).fit(X, y) diff --git a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py index 3bca3101758ff..8313b0b56922e 100644 --- a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py +++ b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py @@ -20,6 +20,7 @@ # %% # Let's start by loading the dataset and creating some sample weights. import numpy as np + from sklearn.datasets import make_regression rng = np.random.RandomState(0) diff --git a/examples/linear_model/plot_huber_vs_ridge.py b/examples/linear_model/plot_huber_vs_ridge.py index 2ea5a190e35d8..7c0222b71a721 100644 --- a/examples/linear_model/plot_huber_vs_ridge.py +++ b/examples/linear_model/plot_huber_vs_ridge.py @@ -16,8 +16,8 @@ # Authors: Manoj Kumar mks542@nyu.edu # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import make_regression from sklearn.linear_model import HuberRegressor, Ridge diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py index 10a1f0f15ad79..4d420c26977ed 100644 --- a/examples/linear_model/plot_iris_logistic.py +++ b/examples/linear_model/plot_iris_logistic.py @@ -16,9 +16,10 @@ # License: BSD 3 clause import matplotlib.pyplot as plt -from sklearn.linear_model import LogisticRegression + from sklearn import datasets from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.linear_model import LogisticRegression # import some data to play with iris = datasets.load_iris() diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py index c167b0ce785e2..a7804c974fd49 100644 --- a/examples/linear_model/plot_lasso_and_elasticnet.py +++ b/examples/linear_model/plot_lasso_and_elasticnet.py @@ -13,8 +13,8 @@ # Data Generation # --------------------------------------------------- -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.metrics import r2_score diff --git a/examples/linear_model/plot_lasso_coordinate_descent_path.py b/examples/linear_model/plot_lasso_coordinate_descent_path.py index 1796dc5011644..ee2f09f000d23 100644 --- a/examples/linear_model/plot_lasso_coordinate_descent_path.py +++ b/examples/linear_model/plot_lasso_coordinate_descent_path.py @@ -14,12 +14,12 @@ # License: BSD 3 clause from itertools import cycle -import numpy as np + import matplotlib.pyplot as plt +import numpy as np -from sklearn.linear_model import lasso_path, enet_path from sklearn import datasets - +from sklearn.linear_model import enet_path, lasso_path X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py index 8da1820c0b0c4..a797d5d708160 100644 --- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py +++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py @@ -9,13 +9,12 @@ """ from time import time -from scipy import sparse -from scipy import linalg + +from scipy import linalg, sparse from sklearn.datasets import make_regression from sklearn.linear_model import Lasso - # %% # Comparing the two Lasso implementations on Dense data # ----------------------------------------------------- diff --git a/examples/linear_model/plot_lasso_lars.py b/examples/linear_model/plot_lasso_lars.py index 6788b8b1d1598..5444aeec90c65 100644 --- a/examples/linear_model/plot_lasso_lars.py +++ b/examples/linear_model/plot_lasso_lars.py @@ -14,11 +14,10 @@ # Alexandre Gramfort # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import linear_model -from sklearn import datasets +from sklearn import datasets, linear_model X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/linear_model/plot_lasso_lars_ic.py b/examples/linear_model/plot_lasso_lars_ic.py index 4a09d28cdce9a..dc76f16fb8c82 100644 --- a/examples/linear_model/plot_lasso_lars_ic.py +++ b/examples/linear_model/plot_lasso_lars_ic.py @@ -36,6 +36,9 @@ n_samples = X.shape[0] X.head() +from sklearn.linear_model import LassoLarsIC +from sklearn.pipeline import make_pipeline + # %% # Scikit-learn provides an estimator called # :class:`~sklearn.linear_model.LinearLarsIC` that uses either Akaike's @@ -46,8 +49,6 @@ # In the following, we are going to fit two models to compare the values # reported by AIC and BIC. from sklearn.preprocessing import StandardScaler -from sklearn.linear_model import LassoLarsIC -from sklearn.pipeline import make_pipeline lasso_lars_ic = make_pipeline( StandardScaler(), LassoLarsIC(criterion="aic", normalize=False) diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index bf2111e32b427..f7e9a2c35196f 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -59,9 +59,10 @@ # # We will first fit a Lasso model with the AIC criterion. import time -from sklearn.preprocessing import StandardScaler + from sklearn.linear_model import LassoLarsIC from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler start_time = time.time() lasso_lars_ic = make_pipeline( diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py index f0f5dbf710714..e82077771bcf1 100644 --- a/examples/linear_model/plot_logistic.py +++ b/examples/linear_model/plot_logistic.py @@ -13,12 +13,12 @@ # Code source: Gael Varoquaux # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt - -from sklearn.linear_model import LogisticRegression, LinearRegression +import numpy as np from scipy.special import expit +from sklearn.linear_model import LinearRegression, LogisticRegression + # Generate a toy dataset, it's just a straight line with some Gaussian noise: xmin, xmax = -5, 5 n_samples = 100 diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py index ce0afef012a2b..d5aa883b7b7b1 100644 --- a/examples/linear_model/plot_logistic_l1_l2_sparsity.py +++ b/examples/linear_model/plot_logistic_l1_l2_sparsity.py @@ -20,11 +20,11 @@ # Andreas Mueller # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.linear_model import LogisticRegression from sklearn import datasets +from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import StandardScaler X, y = datasets.load_digits(return_X_y=True) diff --git a/examples/linear_model/plot_logistic_multinomial.py b/examples/linear_model/plot_logistic_multinomial.py index 814eeadaa68c4..791a788b2238b 100644 --- a/examples/linear_model/plot_logistic_multinomial.py +++ b/examples/linear_model/plot_logistic_multinomial.py @@ -12,11 +12,12 @@ # Authors: Tom Dupre la Tour # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_blobs -from sklearn.linear_model import LogisticRegression from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.linear_model import LogisticRegression # make 3-class dataset for classification centers = [[-5, 0], [0, 1.5], [5, -1]] diff --git a/examples/linear_model/plot_multi_task_lasso_support.py b/examples/linear_model/plot_multi_task_lasso_support.py index b53c78b986acd..f976698c4a30e 100644 --- a/examples/linear_model/plot_multi_task_lasso_support.py +++ b/examples/linear_model/plot_multi_task_lasso_support.py @@ -19,7 +19,7 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.linear_model import MultiTaskLasso, Lasso +from sklearn.linear_model import Lasso, MultiTaskLasso rng = np.random.RandomState(42) diff --git a/examples/linear_model/plot_nnls.py b/examples/linear_model/plot_nnls.py index c8ba2914d783a..05a8550ec166b 100644 --- a/examples/linear_model/plot_nnls.py +++ b/examples/linear_model/plot_nnls.py @@ -9,8 +9,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.metrics import r2_score # %% diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py index 2d1930eab1b2a..657a0cddd4c7f 100644 --- a/examples/linear_model/plot_ols.py +++ b/examples/linear_model/plot_ols.py @@ -20,6 +20,7 @@ import matplotlib.pyplot as plt import numpy as np + from sklearn import datasets, linear_model from sklearn.metrics import mean_squared_error, r2_score diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py index 222226c6b28c2..a027eac2ca707 100644 --- a/examples/linear_model/plot_ols_3d.py +++ b/examples/linear_model/plot_ols_3d.py @@ -17,9 +17,10 @@ # %% # First we load the diabetes dataset. -from sklearn import datasets import numpy as np +from sklearn import datasets + X, y = datasets.load_diabetes(return_X_y=True) indices = (0, 1) diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py index 4c668f97786be..78c1ae69dbbca 100644 --- a/examples/linear_model/plot_ols_ridge_variance.py +++ b/examples/linear_model/plot_ols_ridge_variance.py @@ -25,8 +25,8 @@ # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn import linear_model diff --git a/examples/linear_model/plot_omp.py b/examples/linear_model/plot_omp.py index 94567409b3841..8bcf1383f2536 100644 --- a/examples/linear_model/plot_omp.py +++ b/examples/linear_model/plot_omp.py @@ -10,9 +10,9 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.linear_model import OrthogonalMatchingPursuit -from sklearn.linear_model import OrthogonalMatchingPursuitCV + from sklearn.datasets import make_sparse_coded_signal +from sklearn.linear_model import OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV n_components, n_features = 512, 100 n_nonzero_coefs = 17 diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py index 5ef8f56980dea..78fe9211fae08 100644 --- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py +++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py @@ -41,10 +41,11 @@ # Olivier Grisel # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd +from sklearn.datasets import fetch_openml ############################################################################## # The French Motor Third-Party Liability Claims dataset @@ -53,8 +54,6 @@ # Let's load the motor claim dataset from OpenML: # https://www.openml.org/d/41214 -from sklearn.datasets import fetch_openml - df = fetch_openml(data_id=41214, as_frame=True, parser="pandas").frame df @@ -97,11 +96,14 @@ # In order to fit linear models with those predictors it is therefore # necessary to perform standard feature transformations as follows: -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import FunctionTransformer, OneHotEncoder -from sklearn.preprocessing import StandardScaler, KBinsDiscretizer from sklearn.compose import ColumnTransformer - +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import ( + FunctionTransformer, + KBinsDiscretizer, + OneHotEncoder, + StandardScaler, +) log_scale_transformer = make_pipeline( FunctionTransformer(np.log, validate=False), StandardScaler() @@ -135,8 +137,8 @@ # the training sample. from sklearn.dummy import DummyRegressor -from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split +from sklearn.pipeline import Pipeline df_train, df_test = train_test_split(df, test_size=0.33, random_state=0) @@ -152,9 +154,11 @@ # Let's compute the performance of this constant prediction baseline with 3 # different regression metrics: -from sklearn.metrics import mean_squared_error -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_poisson_deviance +from sklearn.metrics import ( + mean_absolute_error, + mean_poisson_deviance, + mean_squared_error, +) def score_estimator(estimator, df_test): @@ -209,7 +213,6 @@ def score_estimator(estimator, df_test): from sklearn.linear_model import Ridge - ridge_glm = Pipeline( [ ("preprocessor", linear_model_preprocessor), @@ -281,7 +284,6 @@ def score_estimator(estimator, df_test): from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.preprocessing import OrdinalEncoder - tree_preprocessor = ColumnTransformer( [ ( diff --git a/examples/linear_model/plot_polynomial_interpolation.py b/examples/linear_model/plot_polynomial_interpolation.py index ac2fe28de870d..f648b7aea762d 100644 --- a/examples/linear_model/plot_polynomial_interpolation.py +++ b/examples/linear_model/plot_polynomial_interpolation.py @@ -42,13 +42,12 @@ # Malte Londschien # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.linear_model import Ridge -from sklearn.preprocessing import PolynomialFeatures, SplineTransformer from sklearn.pipeline import make_pipeline - +from sklearn.preprocessing import PolynomialFeatures, SplineTransformer # %% # We start by defining a function that we intend to approximate and prepare diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py index b4e1534efc54f..b623e116f5c31 100644 --- a/examples/linear_model/plot_quantile_regression.py +++ b/examples/linear_model/plot_quantile_regression.py @@ -246,8 +246,7 @@ # distributed target to make it more interesting as mean and median are not # equal. from sklearn.linear_model import LinearRegression -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_absolute_error, mean_squared_error linear_regression = LinearRegression() quantile_regression = QuantileRegressor(quantile=0.5, alpha=0) diff --git a/examples/linear_model/plot_ransac.py b/examples/linear_model/plot_ransac.py index 81670061a6609..bb3336dc1e364 100644 --- a/examples/linear_model/plot_ransac.py +++ b/examples/linear_model/plot_ransac.py @@ -11,8 +11,7 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn import linear_model, datasets - +from sklearn import datasets, linear_model n_samples = 1000 n_outliers = 50 diff --git a/examples/linear_model/plot_ridge_path.py b/examples/linear_model/plot_ridge_path.py index 66f8fd9eb6c23..01f9d45a63f8d 100644 --- a/examples/linear_model/plot_ridge_path.py +++ b/examples/linear_model/plot_ridge_path.py @@ -30,8 +30,9 @@ # Author: Fabian Pedregosa -- # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import linear_model # X is the 10x10 Hilbert matrix diff --git a/examples/linear_model/plot_robust_fit.py b/examples/linear_model/plot_robust_fit.py index c9fe49fc0d416..79213c9a8e83e 100644 --- a/examples/linear_model/plot_robust_fit.py +++ b/examples/linear_model/plot_robust_fit.py @@ -30,18 +30,18 @@ """ -from matplotlib import pyplot as plt import numpy as np +from matplotlib import pyplot as plt from sklearn.linear_model import ( + HuberRegressor, LinearRegression, - TheilSenRegressor, RANSACRegressor, - HuberRegressor, + TheilSenRegressor, ) from sklearn.metrics import mean_squared_error -from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import PolynomialFeatures np.random.seed(42) diff --git a/examples/linear_model/plot_sgd_comparison.py b/examples/linear_model/plot_sgd_comparison.py index 5ab0d6b1b2827..0477e42cf5947 100644 --- a/examples/linear_model/plot_sgd_comparison.py +++ b/examples/linear_model/plot_sgd_comparison.py @@ -9,14 +9,17 @@ # Author: Rob Zinkov # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn import datasets +import numpy as np +from sklearn import datasets +from sklearn.linear_model import ( + LogisticRegression, + PassiveAggressiveClassifier, + Perceptron, + SGDClassifier, +) from sklearn.model_selection import train_test_split -from sklearn.linear_model import SGDClassifier, Perceptron -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.linear_model import LogisticRegression heldout = [0.95, 0.90, 0.75, 0.50, 0.01] # Number of rounds to fit and evaluate an estimator. diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py index 123180ac62a9b..ceff4ba8ee056 100644 --- a/examples/linear_model/plot_sgd_early_stopping.py +++ b/examples/linear_model/plot_sgd_early_stopping.py @@ -41,19 +41,19 @@ # # License: BSD 3 clause -import time import sys +import time -import pandas as pd -import numpy as np import matplotlib.pyplot as plt +import numpy as np +import pandas as pd from sklearn import linear_model from sklearn.datasets import fetch_openml -from sklearn.model_selection import train_test_split -from sklearn.utils._testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning +from sklearn.model_selection import train_test_split from sklearn.utils import shuffle +from sklearn.utils._testing import ignore_warnings def load_mnist(n_samples=None, class_0="0", class_1="8"): diff --git a/examples/linear_model/plot_sgd_iris.py b/examples/linear_model/plot_sgd_iris.py index 64dca07396d54..5d9b923f9b444 100644 --- a/examples/linear_model/plot_sgd_iris.py +++ b/examples/linear_model/plot_sgd_iris.py @@ -9,11 +9,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets -from sklearn.linear_model import SGDClassifier from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.linear_model import SGDClassifier # import some data to play with iris = datasets.load_iris() diff --git a/examples/linear_model/plot_sgd_loss_functions.py b/examples/linear_model/plot_sgd_loss_functions.py index a1f74dca4d6af..140562184b946 100644 --- a/examples/linear_model/plot_sgd_loss_functions.py +++ b/examples/linear_model/plot_sgd_loss_functions.py @@ -8,8 +8,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np def modified_huber_loss(y_true, y_pred): diff --git a/examples/linear_model/plot_sgd_penalties.py b/examples/linear_model/plot_sgd_penalties.py index 0413751fb41a9..ff71dba5f20a3 100644 --- a/examples/linear_model/plot_sgd_penalties.py +++ b/examples/linear_model/plot_sgd_penalties.py @@ -11,8 +11,8 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np l1_color = "navy" l2_color = "c" diff --git a/examples/linear_model/plot_sgd_separating_hyperplane.py b/examples/linear_model/plot_sgd_separating_hyperplane.py index af288fcd3dde0..e84ab7c519ae9 100644 --- a/examples/linear_model/plot_sgd_separating_hyperplane.py +++ b/examples/linear_model/plot_sgd_separating_hyperplane.py @@ -9,10 +9,11 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn.linear_model import SGDClassifier +import numpy as np + from sklearn.datasets import make_blobs +from sklearn.linear_model import SGDClassifier # we create 50 separable points X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) diff --git a/examples/linear_model/plot_sgd_weighted_samples.py b/examples/linear_model/plot_sgd_weighted_samples.py index 2db52042b075f..4d605e99b4e49 100644 --- a/examples/linear_model/plot_sgd_weighted_samples.py +++ b/examples/linear_model/plot_sgd_weighted_samples.py @@ -8,8 +8,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import linear_model # we create 20 points diff --git a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py index c25f4a84d91e0..2f03768f50532 100644 --- a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py +++ b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py @@ -19,13 +19,14 @@ """ # noqa: E501 -import numpy as np -import matplotlib.pyplot as plt import matplotlib -from sklearn.svm import OneClassSVM -from sklearn.linear_model import SGDOneClassSVM +import matplotlib.pyplot as plt +import numpy as np + from sklearn.kernel_approximation import Nystroem +from sklearn.linear_model import SGDOneClassSVM from sklearn.pipeline import make_pipeline +from sklearn.svm import OneClassSVM font = {"weight": "normal", "size": 15} diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py index 507dda5c76901..f62208aab154a 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py +++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py @@ -29,9 +29,9 @@ import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized +from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split -from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") t0 = timeit.default_timer() diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py index 37327aeaa4cb7..e6746b8fb0896 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py +++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py @@ -21,6 +21,7 @@ # License: BSD 3 clause import time + import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_theilsen.py b/examples/linear_model/plot_theilsen.py index b380baf705a76..eb0ac4966841d 100644 --- a/examples/linear_model/plot_theilsen.py +++ b/examples/linear_model/plot_theilsen.py @@ -39,10 +39,11 @@ # License: BSD 3 clause import time -import numpy as np + import matplotlib.pyplot as plt -from sklearn.linear_model import LinearRegression, TheilSenRegressor -from sklearn.linear_model import RANSACRegressor +import numpy as np + +from sklearn.linear_model import LinearRegression, RANSACRegressor, TheilSenRegressor estimators = [ ("OLS", LinearRegression()), diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index 3d86903fcdeff..0af020890264e 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -46,14 +46,16 @@ from functools import partial -import numpy as np import matplotlib.pyplot as plt +import numpy as np import pandas as pd from sklearn.datasets import fetch_openml -from sklearn.metrics import mean_tweedie_deviance -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_squared_error +from sklearn.metrics import ( + mean_absolute_error, + mean_squared_error, + mean_tweedie_deviance, +) def load_mtpl2(n_samples=100000): @@ -201,6 +203,8 @@ def score_estimator( return res +from sklearn.compose import ColumnTransformer + # %% # Loading datasets, basic feature extraction and target definitions # ----------------------------------------------------------------- @@ -210,10 +214,12 @@ def score_estimator( # containing the claim amount (``ClaimAmount``) for the same policy ids # (``IDpol``). from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import FunctionTransformer, OneHotEncoder -from sklearn.preprocessing import StandardScaler, KBinsDiscretizer -from sklearn.compose import ColumnTransformer - +from sklearn.preprocessing import ( + FunctionTransformer, + KBinsDiscretizer, + OneHotEncoder, + StandardScaler, +) df = load_mtpl2(n_samples=60000) @@ -259,6 +265,8 @@ def score_estimator( with pd.option_context("display.max_columns", 15): print(df[df.ClaimAmount > 0].head()) +from sklearn.linear_model import PoissonRegressor + # %% # # Frequency model -- Poisson distribution @@ -271,8 +279,6 @@ def score_estimator( # Here we model the frequency ``y = ClaimNb / Exposure``, which is still a # (scaled) Poisson distribution, and use ``Exposure`` as `sample_weight`. from sklearn.model_selection import train_test_split -from sklearn.linear_model import PoissonRegressor - df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0) @@ -370,7 +376,6 @@ def score_estimator( # more than one claim. from sklearn.linear_model import GammaRegressor - mask_train = df_train["ClaimAmount"] > 0 mask_test = df_test["ClaimAmount"] > 0 @@ -480,7 +485,6 @@ def score_estimator( # regardless of `power`. from sklearn.linear_model import TweedieRegressor - glm_pure_premium = TweedieRegressor(power=1.9, alpha=0.1, max_iter=10000) glm_pure_premium.fit( X_train, df_train["PurePremium"], sample_weight=df_train["Exposure"] diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py index 310a476290841..e10331e7adc4c 100644 --- a/examples/manifold/plot_compare_methods.py +++ b/examples/manifold/plot_compare_methods.py @@ -28,14 +28,14 @@ # # We start by generating the S-curve dataset. -from numpy.random import RandomState import matplotlib.pyplot as plt -from matplotlib import ticker # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 +from matplotlib import ticker +from numpy.random import RandomState -from sklearn import manifold, datasets +from sklearn import datasets, manifold rng = RandomState(0) diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py index bafee1b7a7eb0..99520fb281132 100644 --- a/examples/manifold/plot_lle_digits.py +++ b/examples/manifold/plot_lle_digits.py @@ -45,6 +45,7 @@ # scattered across it. import numpy as np from matplotlib import offsetbox + from sklearn.preprocessing import MinMaxScaler @@ -103,11 +104,11 @@ def plot_embedding(X, title): from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.ensemble import RandomTreesEmbedding from sklearn.manifold import ( + MDS, + TSNE, Isomap, LocallyLinearEmbedding, - MDS, SpectralEmbedding, - TSNE, ) from sklearn.neighbors import NeighborhoodComponentsAnalysis from sklearn.pipeline import make_pipeline diff --git a/examples/manifold/plot_manifold_sphere.py b/examples/manifold/plot_manifold_sphere.py index b68400e0b7e66..dd16ddbf86526 100644 --- a/examples/manifold/plot_manifold_sphere.py +++ b/examples/manifold/plot_manifold_sphere.py @@ -29,16 +29,18 @@ # Author: Jaques Grobler # License: BSD 3 clause +import warnings from time import time -import numpy as np + import matplotlib.pyplot as plt -from matplotlib.ticker import NullFormatter -from sklearn import manifold -from sklearn.utils import check_random_state # Unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 -import warnings +import numpy as np +from matplotlib.ticker import NullFormatter + +from sklearn import manifold +from sklearn.utils import check_random_state # Variables for manifold learning. n_neighbors = 10 diff --git a/examples/manifold/plot_mds.py b/examples/manifold/plot_mds.py index 4a90268eba902..87db0f5ad3a50 100644 --- a/examples/manifold/plot_mds.py +++ b/examples/manifold/plot_mds.py @@ -14,13 +14,12 @@ # License: BSD import numpy as np - from matplotlib import pyplot as plt from matplotlib.collections import LineCollection from sklearn import manifold -from sklearn.metrics import euclidean_distances from sklearn.decomposition import PCA +from sklearn.metrics import euclidean_distances EPSILON = np.finfo(np.float32).eps n_samples = 20 diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py index 4bad9afd43ab6..c4dfa0a0402af 100644 --- a/examples/manifold/plot_swissroll.py +++ b/examples/manifold/plot_swissroll.py @@ -15,8 +15,8 @@ # We start by generating the Swiss Roll dataset. import matplotlib.pyplot as plt -from sklearn import manifold, datasets +from sklearn import datasets, manifold sr_points, sr_color = datasets.make_swiss_roll(n_samples=1500, random_state=0) diff --git a/examples/manifold/plot_t_sne_perplexity.py b/examples/manifold/plot_t_sne_perplexity.py index 20fd00a835620..65021c4ea6fcf 100644 --- a/examples/manifold/plot_t_sne_perplexity.py +++ b/examples/manifold/plot_t_sne_perplexity.py @@ -27,12 +27,13 @@ # Author: Narine Kokhlikyan # License: BSD -import numpy as np -import matplotlib.pyplot as plt +from time import time +import matplotlib.pyplot as plt +import numpy as np from matplotlib.ticker import NullFormatter -from sklearn import manifold, datasets -from time import time + +from sklearn import datasets, manifold n_samples = 150 n_components = 2 diff --git a/examples/miscellaneous/plot_anomaly_comparison.py b/examples/miscellaneous/plot_anomaly_comparison.py index efb4f6d86edfc..7d5bc7d59bfa1 100644 --- a/examples/miscellaneous/plot_anomaly_comparison.py +++ b/examples/miscellaneous/plot_anomaly_comparison.py @@ -68,17 +68,17 @@ import time -import numpy as np import matplotlib import matplotlib.pyplot as plt +import numpy as np from sklearn import svm -from sklearn.datasets import make_moons, make_blobs from sklearn.covariance import EllipticEnvelope +from sklearn.datasets import make_blobs, make_moons from sklearn.ensemble import IsolationForest -from sklearn.neighbors import LocalOutlierFactor -from sklearn.linear_model import SGDOneClassSVM from sklearn.kernel_approximation import Nystroem +from sklearn.linear_model import SGDOneClassSVM +from sklearn.neighbors import LocalOutlierFactor from sklearn.pipeline import make_pipeline matplotlib.rcParams["contour.negative_linestyle"] = "solid" diff --git a/examples/miscellaneous/plot_changed_only_pprint_parameter.py b/examples/miscellaneous/plot_changed_only_pprint_parameter.py index c213c4b5d98af..aef6d28402a2b 100644 --- a/examples/miscellaneous/plot_changed_only_pprint_parameter.py +++ b/examples/miscellaneous/plot_changed_only_pprint_parameter.py @@ -11,9 +11,8 @@ """ -from sklearn.linear_model import LogisticRegression from sklearn import set_config - +from sklearn.linear_model import LogisticRegression lr = LogisticRegression(penalty="l1") print("Default representation:") diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py index f108beced7a00..24095de3b5cae 100644 --- a/examples/miscellaneous/plot_display_object_visualization.py +++ b/examples/miscellaneous/plot_display_object_visualization.py @@ -24,10 +24,10 @@ # data is split into a train and test dataset and a logistic regression is # fitted with the train dataset. from sklearn.datasets import fetch_openml -from sklearn.preprocessing import StandardScaler -from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler X, y = fetch_openml(data_id=1464, return_X_y=True, parser="pandas") X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y) @@ -41,8 +41,7 @@ # With the fitted model, we compute the predictions of the model on the test # dataset. These predictions are used to compute the confustion matrix which # is plotted with the :class:`ConfusionMatrixDisplay` -from sklearn.metrics import confusion_matrix -from sklearn.metrics import ConfusionMatrixDisplay +from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix y_pred = clf.predict(X_test) cm = confusion_matrix(y_test, y_pred) @@ -56,8 +55,7 @@ # The roc curve requires either the probabilities or the non-thresholded # decision values from the estimator. Since the logistic regression provides # a decision function, we will use it to plot the roc curve: -from sklearn.metrics import roc_curve -from sklearn.metrics import RocCurveDisplay +from sklearn.metrics import RocCurveDisplay, roc_curve y_score = clf.decision_function(X_test) @@ -69,8 +67,7 @@ ############################################################################## # Similarly, the precision recall curve can be plotted using `y_score` from # the prevision sections. -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import PrecisionRecallDisplay +from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=clf.classes_[1]) pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot() diff --git a/examples/miscellaneous/plot_isotonic_regression.py b/examples/miscellaneous/plot_isotonic_regression.py index 0240a8dec34b5..a1c1174c9e9de 100644 --- a/examples/miscellaneous/plot_isotonic_regression.py +++ b/examples/miscellaneous/plot_isotonic_regression.py @@ -23,12 +23,12 @@ # Alexandre Gramfort # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.collections import LineCollection -from sklearn.linear_model import LinearRegression from sklearn.isotonic import IsotonicRegression +from sklearn.linear_model import LinearRegression from sklearn.utils import check_random_state n = 100 diff --git a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py index 6fd9d3614804c..85161a6ee51bb 100644 --- a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py +++ b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py @@ -15,13 +15,16 @@ import sys from time import time -import numpy as np + import matplotlib.pyplot as plt -from sklearn.random_projection import johnson_lindenstrauss_min_dim -from sklearn.random_projection import SparseRandomProjection -from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.datasets import load_digits +import numpy as np + +from sklearn.datasets import fetch_20newsgroups_vectorized, load_digits from sklearn.metrics.pairwise import euclidean_distances +from sklearn.random_projection import ( + SparseRandomProjection, + johnson_lindenstrauss_min_dim, +) # %% # Theoretical bounds diff --git a/examples/miscellaneous/plot_kernel_approximation.py b/examples/miscellaneous/plot_kernel_approximation.py index 7dfc1e31220e8..372b8f9a37197 100644 --- a/examples/miscellaneous/plot_kernel_approximation.py +++ b/examples/miscellaneous/plot_kernel_approximation.py @@ -38,15 +38,16 @@ # Andreas Mueller # License: BSD 3 clause +from time import time + # Standard scientific Python imports import matplotlib.pyplot as plt import numpy as np -from time import time # Import datasets, classifiers and performance metrics -from sklearn import datasets, svm, pipeline -from sklearn.kernel_approximation import RBFSampler, Nystroem +from sklearn import datasets, pipeline, svm from sklearn.decomposition import PCA +from sklearn.kernel_approximation import Nystroem, RBFSampler # The digits dataset digits = datasets.load_digits(n_class=9) diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py index dd696443d6b31..1882a838e5ef1 100644 --- a/examples/miscellaneous/plot_kernel_ridge_regression.py +++ b/examples/miscellaneous/plot_kernel_ridge_regression.py @@ -40,9 +40,9 @@ # Construct the kernel-based regression models # -------------------------------------------- +from sklearn.kernel_ridge import KernelRidge from sklearn.model_selection import GridSearchCV from sklearn.svm import SVR -from sklearn.kernel_ridge import KernelRidge train_size = 100 diff --git a/examples/miscellaneous/plot_multilabel.py b/examples/miscellaneous/plot_multilabel.py index aded595258fea..b424c3253104a 100644 --- a/examples/miscellaneous/plot_multilabel.py +++ b/examples/miscellaneous/plot_multilabel.py @@ -32,14 +32,14 @@ # Authors: Vlad Niculae, Mathieu Blondel # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn.cross_decomposition import CCA from sklearn.datasets import make_multilabel_classification +from sklearn.decomposition import PCA from sklearn.multiclass import OneVsRestClassifier from sklearn.svm import SVC -from sklearn.decomposition import PCA -from sklearn.cross_decomposition import CCA def plot_hyperplane(clf, min_x, max_x, linestyle, label): diff --git a/examples/miscellaneous/plot_multioutput_face_completion.py b/examples/miscellaneous/plot_multioutput_face_completion.py index 31e73195747a5..62070bc05e488 100644 --- a/examples/miscellaneous/plot_multioutput_face_completion.py +++ b/examples/miscellaneous/plot_multioutput_face_completion.py @@ -12,16 +12,14 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import fetch_olivetti_faces -from sklearn.utils.validation import check_random_state - from sklearn.ensemble import ExtraTreesRegressor +from sklearn.linear_model import LinearRegression, RidgeCV from sklearn.neighbors import KNeighborsRegressor -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import RidgeCV +from sklearn.utils.validation import check_random_state # Load the faces datasets data, targets = fetch_olivetti_faces(return_X_y=True) diff --git a/examples/miscellaneous/plot_outlier_detection_bench.py b/examples/miscellaneous/plot_outlier_detection_bench.py index f2d0b922710ca..0e2a158b43df0 100644 --- a/examples/miscellaneous/plot_outlier_detection_bench.py +++ b/examples/miscellaneous/plot_outlier_detection_bench.py @@ -32,10 +32,11 @@ # The `preprocess_dataset` function returns data and target. import numpy as np -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml -from sklearn.preprocessing import LabelBinarizer import pandas as pd +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml +from sklearn.preprocessing import LabelBinarizer + rng = np.random.RandomState(42) @@ -118,8 +119,8 @@ def preprocess_dataset(dataset_name): # `compute_prediction` function returns average outlier score of X. -from sklearn.neighbors import LocalOutlierFactor from sklearn.ensemble import IsolationForest +from sklearn.neighbors import LocalOutlierFactor def compute_prediction(X, model_name): @@ -147,7 +148,9 @@ def compute_prediction(X, model_name): import math + import matplotlib.pyplot as plt + from sklearn.metrics import RocCurveDisplay datasets_name = [ diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py index 604ead891877c..19873ad7e2af2 100644 --- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py +++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py @@ -14,15 +14,15 @@ """ # noqa: E501 -import pandas as pd import matplotlib.pyplot as plt +import pandas as pd + from sklearn.datasets import load_diabetes +from sklearn.inspection import PartialDependenceDisplay from sklearn.neural_network import MLPRegressor -from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeRegressor -from sklearn.inspection import PartialDependenceDisplay - # %% # Train models on the diabetes dataset diff --git a/examples/miscellaneous/plot_pipeline_display.py b/examples/miscellaneous/plot_pipeline_display.py index f0fea8d2f3a27..9642bb56b903f 100755 --- a/examples/miscellaneous/plot_pipeline_display.py +++ b/examples/miscellaneous/plot_pipeline_display.py @@ -19,10 +19,10 @@ # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual # representation. +from sklearn import set_config +from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.linear_model import LogisticRegression -from sklearn import set_config steps = [ ("preprocessing", StandardScaler()), @@ -53,9 +53,9 @@ # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual # representation. -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler, PolynomialFeatures from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import PolynomialFeatures, StandardScaler steps = [ ("standard_scaler", StandardScaler()), @@ -73,9 +73,9 @@ # a classifier, :class:`~sklearn.svm.SVC`, and displays its visual # representation. +from sklearn.decomposition import PCA from sklearn.pipeline import Pipeline from sklearn.svm import SVC -from sklearn.decomposition import PCA steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))] pipe = Pipeline(steps) @@ -90,12 +90,12 @@ # representation. import numpy as np -from sklearn.pipeline import make_pipeline -from sklearn.pipeline import Pipeline -from sklearn.impute import SimpleImputer + from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler numeric_preprocessor = Pipeline( steps=[ @@ -133,13 +133,13 @@ # representation. import numpy as np -from sklearn.pipeline import make_pipeline -from sklearn.pipeline import Pipeline -from sklearn.impute import SimpleImputer + from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier +from sklearn.impute import SimpleImputer from sklearn.model_selection import GridSearchCV +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler numeric_preprocessor = Pipeline( steps=[ diff --git a/examples/miscellaneous/plot_roc_curve_visualization_api.py b/examples/miscellaneous/plot_roc_curve_visualization_api.py index b4e08493c77d4..7fc8df9724337 100644 --- a/examples/miscellaneous/plot_roc_curve_visualization_api.py +++ b/examples/miscellaneous/plot_roc_curve_visualization_api.py @@ -15,11 +15,12 @@ # First, we load the wine dataset and convert it to a binary classification # problem. Then, we train a support vector classifier on a training dataset. import matplotlib.pyplot as plt -from sklearn.svm import SVC + +from sklearn.datasets import load_wine from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import RocCurveDisplay -from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split +from sklearn.svm import SVC X, y = load_wine(return_X_y=True) y = y == 2 diff --git a/examples/mixture/plot_concentration_prior.py b/examples/mixture/plot_concentration_prior.py index b143cfed10318..f32e08ac6a26d 100644 --- a/examples/mixture/plot_concentration_prior.py +++ b/examples/mixture/plot_concentration_prior.py @@ -32,10 +32,10 @@ # Author: Thierry Guillemot # License: BSD 3 clause -import numpy as np import matplotlib as mpl -import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec +import matplotlib.pyplot as plt +import numpy as np from sklearn.mixture import BayesianGaussianMixture diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py index 675aa341696ac..d9481cc78758d 100644 --- a/examples/mixture/plot_gmm.py +++ b/examples/mixture/plot_gmm.py @@ -26,10 +26,10 @@ import itertools +import matplotlib as mpl +import matplotlib.pyplot as plt import numpy as np from scipy import linalg -import matplotlib.pyplot as plt -import matplotlib as mpl from sklearn import mixture diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py index 95b5d2c1ba90f..cd12324f00c08 100644 --- a/examples/mixture/plot_gmm_covariances.py +++ b/examples/mixture/plot_gmm_covariances.py @@ -33,7 +33,6 @@ import matplotlib as mpl import matplotlib.pyplot as plt - import numpy as np from sklearn import datasets diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py index 23a4788b799b4..df9be5189a996 100644 --- a/examples/mixture/plot_gmm_init.py +++ b/examples/mixture/plot_gmm_init.py @@ -37,12 +37,14 @@ # Author: Gordon Walsh # Data generation code from Jake Vanderplas +from timeit import default_timer as timer + import matplotlib.pyplot as plt import numpy as np + +from sklearn.datasets._samples_generator import make_blobs from sklearn.mixture import GaussianMixture from sklearn.utils.extmath import row_norms -from sklearn.datasets._samples_generator import make_blobs -from timeit import default_timer as timer print(__doc__) diff --git a/examples/mixture/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py index 70d58f22f8f41..062bdfd4d6d67 100644 --- a/examples/mixture/plot_gmm_pdf.py +++ b/examples/mixture/plot_gmm_pdf.py @@ -9,9 +9,10 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import LogNorm + from sklearn import mixture n_samples = 300 diff --git a/examples/mixture/plot_gmm_selection.py b/examples/mixture/plot_gmm_selection.py index 82175091ee049..acfbaf7ed04ff 100644 --- a/examples/mixture/plot_gmm_selection.py +++ b/examples/mixture/plot_gmm_selection.py @@ -16,12 +16,12 @@ """ -import numpy as np import itertools -from scipy import linalg -import matplotlib.pyplot as plt import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +from scipy import linalg from sklearn import mixture diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py index 76f0d30e4e9d8..3d663e195ceb5 100644 --- a/examples/mixture/plot_gmm_sin.py +++ b/examples/mixture/plot_gmm_sin.py @@ -41,10 +41,10 @@ import itertools +import matplotlib as mpl +import matplotlib.pyplot as plt import numpy as np from scipy import linalg -import matplotlib.pyplot as plt -import matplotlib as mpl from sklearn import mixture diff --git a/examples/model_selection/grid_search_text_feature_extraction.py b/examples/model_selection/grid_search_text_feature_extraction.py index 91801b361265b..2440c7dce5427 100644 --- a/examples/model_selection/grid_search_text_feature_extraction.py +++ b/examples/model_selection/grid_search_text_feature_extraction.py @@ -50,13 +50,12 @@ # Data loading # ------------ +import logging from pprint import pprint from time import time -import logging from sklearn.datasets import fetch_20newsgroups -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.linear_model import SGDClassifier from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index b891564db4025..278083a994e58 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -24,12 +24,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn import svm, datasets -from sklearn.model_selection import train_test_split +from sklearn import datasets, svm from sklearn.metrics import ConfusionMatrixDisplay +from sklearn.model_selection import train_test_split # import some data to play with iris = datasets.load_iris() diff --git a/examples/model_selection/plot_cv_indices.py b/examples/model_selection/plot_cv_indices.py index 8b70191e4abd1..e6c3580c787f0 100644 --- a/examples/model_selection/plot_cv_indices.py +++ b/examples/model_selection/plot_cv_indices.py @@ -12,19 +12,20 @@ """ +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.patches import Patch + from sklearn.model_selection import ( - TimeSeriesSplit, + GroupKFold, + GroupShuffleSplit, KFold, ShuffleSplit, + StratifiedGroupKFold, StratifiedKFold, - GroupShuffleSplit, - GroupKFold, StratifiedShuffleSplit, - StratifiedGroupKFold, + TimeSeriesSplit, ) -import numpy as np -import matplotlib.pyplot as plt -from matplotlib.patches import Patch rng = np.random.RandomState(1338) cmap_data = plt.cm.Paired diff --git a/examples/model_selection/plot_cv_predict.py b/examples/model_selection/plot_cv_predict.py index 82ef0b8b81ae6..ca0726c30d534 100644 --- a/examples/model_selection/plot_cv_predict.py +++ b/examples/model_selection/plot_cv_predict.py @@ -9,11 +9,11 @@ """ -from sklearn import datasets -from sklearn.model_selection import cross_val_predict -from sklearn import linear_model import matplotlib.pyplot as plt +from sklearn import datasets, linear_model +from sklearn.model_selection import cross_val_predict + lr = linear_model.LinearRegression() X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/model_selection/plot_grid_search_digits.py b/examples/model_selection/plot_grid_search_digits.py index 2aaa64043749b..77a2b5c92de33 100644 --- a/examples/model_selection/plot_grid_search_digits.py +++ b/examples/model_selection/plot_grid_search_digits.py @@ -17,9 +17,8 @@ """ from sklearn import datasets -from sklearn.model_selection import train_test_split -from sklearn.model_selection import GridSearchCV from sklearn.metrics import classification_report +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.svm import SVC # Loading the Digits dataset diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py index 53513aa4ba1ec..a851ee5f9bb19 100644 --- a/examples/model_selection/plot_grid_search_refit_callable.py +++ b/examples/model_selection/plot_grid_search_refit_callable.py @@ -20,8 +20,8 @@ # Author: Wenhao Zhang -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import load_digits from sklearn.decomposition import PCA diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py index 179d860b42128..fbeb485d8db44 100644 --- a/examples/model_selection/plot_grid_search_stats.py +++ b/examples/model_selection/plot_grid_search_stats.py @@ -16,6 +16,7 @@ import matplotlib.pyplot as plt import seaborn as sns + from sklearn.datasets import make_moons X, y = make_moons(noise=0.352, random_state=1, n_samples=100) diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py index 25f43d8b8a3e4..dab4bf04dca55 100644 --- a/examples/model_selection/plot_learning_curve.py +++ b/examples/model_selection/plot_learning_curve.py @@ -17,13 +17,13 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import load_digits +from sklearn.model_selection import ShuffleSplit, learning_curve from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC -from sklearn.datasets import load_digits -from sklearn.model_selection import learning_curve -from sklearn.model_selection import ShuffleSplit def plot_learning_curve( diff --git a/examples/model_selection/plot_multi_metric_evaluation.py b/examples/model_selection/plot_multi_metric_evaluation.py index e47e67e086ccb..674bf8bc1b07c 100644 --- a/examples/model_selection/plot_multi_metric_evaluation.py +++ b/examples/model_selection/plot_multi_metric_evaluation.py @@ -23,9 +23,8 @@ from matplotlib import pyplot as plt from sklearn.datasets import make_hastie_10_2 +from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import GridSearchCV -from sklearn.metrics import make_scorer -from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier # %% diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py index 81d89d93afe91..48f70dab43cf8 100644 --- a/examples/model_selection/plot_nested_cross_validation_iris.py +++ b/examples/model_selection/plot_nested_cross_validation_iris.py @@ -44,11 +44,12 @@ """ -from sklearn.datasets import load_iris +import numpy as np from matplotlib import pyplot as plt + +from sklearn.datasets import load_iris +from sklearn.model_selection import GridSearchCV, KFold, cross_val_score from sklearn.svm import SVC -from sklearn.model_selection import GridSearchCV, cross_val_score, KFold -import numpy as np # Number of random trials NUM_TRIALS = 30 diff --git a/examples/model_selection/plot_permutation_tests_for_classification.py b/examples/model_selection/plot_permutation_tests_for_classification.py index 23e3688c437f5..f81115c64ec0d 100644 --- a/examples/model_selection/plot_permutation_tests_for_classification.py +++ b/examples/model_selection/plot_permutation_tests_for_classification.py @@ -58,9 +58,8 @@ # the percentage of permutations for which the score obtained is greater # that the score obtained using the original data. +from sklearn.model_selection import StratifiedKFold, permutation_test_score from sklearn.svm import SVC -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import permutation_test_score clf = SVC(kernel="linear", random_state=7) cv = StratifiedKFold(2, shuffle=True, random_state=0) diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py index 4d9ebcdc4abe2..5e523b0958e42 100644 --- a/examples/model_selection/plot_precision_recall.py +++ b/examples/model_selection/plot_precision_recall.py @@ -100,6 +100,7 @@ # # We will use a Linear SVC classifier to differentiate two types of irises. import numpy as np + from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split @@ -194,8 +195,7 @@ # %% # The average precision score in multi-label settings # ................................................... -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import average_precision_score +from sklearn.metrics import average_precision_score, precision_recall_curve # For each class precision = dict() @@ -222,11 +222,12 @@ display.plot() _ = display.ax_.set_title("Micro-averaged over all classes") +from itertools import cycle + # %% # Plot Precision-Recall curve for each class and iso-f1 curves # ............................................................ import matplotlib.pyplot as plt -from itertools import cycle # setup plot details colors = cycle(["navy", "turquoise", "darkorange", "cornflowerblue", "teal"]) diff --git a/examples/model_selection/plot_randomized_search.py b/examples/model_selection/plot_randomized_search.py index d5514a9b1c278..aaf885b3ad63a 100644 --- a/examples/model_selection/plot_randomized_search.py +++ b/examples/model_selection/plot_randomized_search.py @@ -20,15 +20,15 @@ """ -import numpy as np - from time import time + +import numpy as np import scipy.stats as stats -from sklearn.utils.fixes import loguniform -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.datasets import load_digits from sklearn.linear_model import SGDClassifier +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV +from sklearn.utils.fixes import loguniform # get some data X, y = load_digits(return_X_y=True, n_class=3) diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 70657d7d9e068..0f2d366c166ab 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -33,16 +33,16 @@ """ -import numpy as np -import matplotlib.pyplot as plt from itertools import cycle -from sklearn import svm, datasets -from sklearn.metrics import roc_curve, auc +import matplotlib.pyplot as plt +import numpy as np + +from sklearn import datasets, svm +from sklearn.metrics import auc, roc_auc_score, roc_curve from sklearn.model_selection import train_test_split -from sklearn.preprocessing import label_binarize from sklearn.multiclass import OneVsRestClassifier -from sklearn.metrics import roc_auc_score +from sklearn.preprocessing import label_binarize # Import some data to play with iris = datasets.load_iris() diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index 791f9167f3333..b87550846672c 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -54,8 +54,7 @@ import matplotlib.pyplot as plt from sklearn import svm -from sklearn.metrics import auc -from sklearn.metrics import RocCurveDisplay +from sklearn.metrics import RocCurveDisplay, auc from sklearn.model_selection import StratifiedKFold # Run classifier with cross-validation and plot ROC curves diff --git a/examples/model_selection/plot_successive_halving_heatmap.py b/examples/model_selection/plot_successive_halving_heatmap.py index c7104f6d7144b..ba808f5d893bc 100644 --- a/examples/model_selection/plot_successive_halving_heatmap.py +++ b/examples/model_selection/plot_successive_halving_heatmap.py @@ -14,12 +14,10 @@ import numpy as np import pandas as pd -from sklearn.svm import SVC from sklearn import datasets -from sklearn.model_selection import GridSearchCV from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.model_selection import HalvingGridSearchCV - +from sklearn.model_selection import GridSearchCV, HalvingGridSearchCV +from sklearn.svm import SVC # %% # We first define the parameter space for an :class:`~sklearn.svm.SVC` diff --git a/examples/model_selection/plot_successive_halving_iterations.py b/examples/model_selection/plot_successive_halving_iterations.py index bd2d5635e376e..31805d308e269 100644 --- a/examples/model_selection/plot_successive_halving_iterations.py +++ b/examples/model_selection/plot_successive_halving_iterations.py @@ -10,16 +10,15 @@ """ -import pandas as pd -from sklearn import datasets import matplotlib.pyplot as plt -from scipy.stats import randint import numpy as np +import pandas as pd +from scipy.stats import randint +from sklearn import datasets +from sklearn.ensemble import RandomForestClassifier from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.model_selection import HalvingRandomSearchCV -from sklearn.ensemble import RandomForestClassifier - # %% # We first define the parameter space and train a diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index 528d3482be15b..2fce0c20688c2 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -19,6 +19,7 @@ # Generate sample data # -------------------- import numpy as np + from sklearn import linear_model from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split diff --git a/examples/model_selection/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py index ae8450b50cea9..412946fc9ca8b 100644 --- a/examples/model_selection/plot_underfitting_overfitting.py +++ b/examples/model_selection/plot_underfitting_overfitting.py @@ -21,12 +21,13 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import PolynomialFeatures +import numpy as np + from sklearn.linear_model import LinearRegression from sklearn.model_selection import cross_val_score +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import PolynomialFeatures def true_fun(X): diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index 1b3c562594188..0b6e65d186d6a 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -17,8 +17,8 @@ import numpy as np from sklearn.datasets import load_digits -from sklearn.svm import SVC from sklearn.model_selection import validation_curve +from sklearn.svm import SVC X, y = load_digits(return_X_y=True) subset_mask = np.isin(y, [1, 2]) # binary classification: 1 vs 2 diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index e1f9feed43a97..1df4ee3b8346b 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -36,14 +36,15 @@ # Author: Adam Kleczewski # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_openml -from sklearn.multioutput import ClassifierChain +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import jaccard_score from sklearn.model_selection import train_test_split from sklearn.multiclass import OneVsRestClassifier -from sklearn.metrics import jaccard_score -from sklearn.linear_model import LogisticRegression +from sklearn.multioutput import ClassifierChain # Load a multi-label dataset from https://www.openml.org/d/40597 X, Y = fetch_openml("yeast", version=4, return_X_y=True, parser="pandas") diff --git a/examples/neighbors/approximate_nearest_neighbors.py b/examples/neighbors/approximate_nearest_neighbors.py index 479e324cd6aa4..e3968149d8473 100644 --- a/examples/neighbors/approximate_nearest_neighbors.py +++ b/examples/neighbors/approximate_nearest_neighbors.py @@ -43,11 +43,12 @@ """ +import sys + # Author: Tom Dupre la Tour # # License: BSD 3 clause import time -import sys try: import annoy @@ -61,18 +62,18 @@ print("The package 'nmslib' is required to run this example.") sys.exit() -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.ticker import NullFormatter from scipy.sparse import csr_matrix from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.neighbors import KNeighborsTransformer -from sklearn.utils._testing import assert_array_almost_equal from sklearn.datasets import fetch_openml -from sklearn.pipeline import make_pipeline from sklearn.manifold import TSNE +from sklearn.neighbors import KNeighborsTransformer +from sklearn.pipeline import make_pipeline from sklearn.utils import shuffle +from sklearn.utils._testing import assert_array_almost_equal class NMSlibTransformer(TransformerMixin, BaseEstimator): diff --git a/examples/neighbors/plot_caching_nearest_neighbors.py b/examples/neighbors/plot_caching_nearest_neighbors.py index 00be6470c1591..10c0d315da7af 100644 --- a/examples/neighbors/plot_caching_nearest_neighbors.py +++ b/examples/neighbors/plot_caching_nearest_neighbors.py @@ -22,11 +22,12 @@ # # License: BSD 3 clause from tempfile import TemporaryDirectory + import matplotlib.pyplot as plt -from sklearn.neighbors import KNeighborsTransformer, KNeighborsClassifier -from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_digits +from sklearn.model_selection import GridSearchCV +from sklearn.neighbors import KNeighborsClassifier, KNeighborsTransformer from sklearn.pipeline import Pipeline X, y = load_digits(return_X_y=True) diff --git a/examples/neighbors/plot_classification.py b/examples/neighbors/plot_classification.py index cc4f0864ba926..4ed23862ae455 100644 --- a/examples/neighbors/plot_classification.py +++ b/examples/neighbors/plot_classification.py @@ -11,7 +11,8 @@ import matplotlib.pyplot as plt import seaborn as sns from matplotlib.colors import ListedColormap -from sklearn import neighbors, datasets + +from sklearn import datasets, neighbors from sklearn.inspection import DecisionBoundaryDisplay n_neighbors = 15 diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py index e580f9fa178bc..045058eab09cc 100644 --- a/examples/neighbors/plot_digits_kde_sampling.py +++ b/examples/neighbors/plot_digits_kde_sampling.py @@ -11,13 +11,13 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.datasets import load_digits -from sklearn.neighbors import KernelDensity from sklearn.decomposition import PCA from sklearn.model_selection import GridSearchCV +from sklearn.neighbors import KernelDensity # load the data digits = load_digits() diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py index 8b139d4cc2335..7c9b6d618b57a 100644 --- a/examples/neighbors/plot_kde_1d.py +++ b/examples/neighbors/plot_kde_1d.py @@ -28,11 +28,13 @@ """ +import matplotlib.pyplot as plt + # Author: Jake Vanderplas # import numpy as np -import matplotlib.pyplot as plt from scipy.stats import norm + from sklearn.neighbors import KernelDensity # ---------------------------------------------------------------------- diff --git a/examples/neighbors/plot_lof_novelty_detection.py b/examples/neighbors/plot_lof_novelty_detection.py index 277134cc77673..91e40661c6dfe 100644 --- a/examples/neighbors/plot_lof_novelty_detection.py +++ b/examples/neighbors/plot_lof_novelty_detection.py @@ -25,9 +25,10 @@ """ -import numpy as np import matplotlib import matplotlib.pyplot as plt +import numpy as np + from sklearn.neighbors import LocalOutlierFactor np.random.seed(42) diff --git a/examples/neighbors/plot_lof_outlier_detection.py b/examples/neighbors/plot_lof_outlier_detection.py index 1512173965889..a01a06eededb7 100644 --- a/examples/neighbors/plot_lof_outlier_detection.py +++ b/examples/neighbors/plot_lof_outlier_detection.py @@ -24,8 +24,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.neighbors import LocalOutlierFactor np.random.seed(42) diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py index 17e6a667fcb3b..5dcca91fd94ed 100644 --- a/examples/neighbors/plot_nca_classification.py +++ b/examples/neighbors/plot_nca_classification.py @@ -19,13 +19,13 @@ import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap + from sklearn import datasets +from sklearn.inspection import DecisionBoundaryDisplay from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis from sklearn.pipeline import Pipeline -from sklearn.inspection import DecisionBoundaryDisplay - +from sklearn.preprocessing import StandardScaler n_neighbors = 1 diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py index d245e0223ccfa..82fd35616929e 100644 --- a/examples/neighbors/plot_nca_dim_reduction.py +++ b/examples/neighbors/plot_nca_dim_reduction.py @@ -30,12 +30,13 @@ # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets -from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py index d722ffa5be033..e5fd2f9cb67bd 100644 --- a/examples/neighbors/plot_nca_illustration.py +++ b/examples/neighbors/plot_nca_illustration.py @@ -12,13 +12,14 @@ # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn.datasets import make_classification -from sklearn.neighbors import NeighborhoodComponentsAnalysis +import numpy as np from matplotlib import cm from scipy.special import logsumexp +from sklearn.datasets import make_classification +from sklearn.neighbors import NeighborhoodComponentsAnalysis + # %% # Original points # --------------- diff --git a/examples/neighbors/plot_nearest_centroid.py b/examples/neighbors/plot_nearest_centroid.py index 0ea3c0c6b1209..5d299b5353e9d 100644 --- a/examples/neighbors/plot_nearest_centroid.py +++ b/examples/neighbors/plot_nearest_centroid.py @@ -8,12 +8,13 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap + from sklearn import datasets -from sklearn.neighbors import NearestCentroid from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.neighbors import NearestCentroid n_neighbors = 15 diff --git a/examples/neighbors/plot_regression.py b/examples/neighbors/plot_regression.py index 78b850d1a4e2c..1e52af3a5c743 100644 --- a/examples/neighbors/plot_regression.py +++ b/examples/neighbors/plot_regression.py @@ -15,11 +15,13 @@ # License: BSD 3 clause (C) INRIA +import matplotlib.pyplot as plt + # %% # Generate sample data # -------------------- import numpy as np -import matplotlib.pyplot as plt + from sklearn import neighbors np.random.seed(0) diff --git a/examples/neighbors/plot_species_kde.py b/examples/neighbors/plot_species_kde.py index c409d354ec986..6007ebd58cb7b 100644 --- a/examples/neighbors/plot_species_kde.py +++ b/examples/neighbors/plot_species_kde.py @@ -40,8 +40,9 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_species_distributions from sklearn.neighbors import KernelDensity diff --git a/examples/neural_networks/plot_mlp_alpha.py b/examples/neural_networks/plot_mlp_alpha.py index 443d41f4707bf..b53beef54c115 100644 --- a/examples/neural_networks/plot_mlp_alpha.py +++ b/examples/neural_networks/plot_mlp_alpha.py @@ -23,11 +23,12 @@ import numpy as np from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap + +from sklearn.datasets import make_circles, make_classification, make_moons from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler h = 0.02 # step size in the mesh diff --git a/examples/neural_networks/plot_mlp_training_curves.py b/examples/neural_networks/plot_mlp_training_curves.py index 3fbddda879162..a9f03c2599a8e 100644 --- a/examples/neural_networks/plot_mlp_training_curves.py +++ b/examples/neural_networks/plot_mlp_training_curves.py @@ -18,10 +18,10 @@ import matplotlib.pyplot as plt -from sklearn.neural_network import MLPClassifier -from sklearn.preprocessing import MinMaxScaler from sklearn import datasets from sklearn.exceptions import ConvergenceWarning +from sklearn.neural_network import MLPClassifier +from sklearn.preprocessing import MinMaxScaler # different learning rate schedules and momentum parameters params = [ diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py index 03f615786e830..43e6a171fb696 100644 --- a/examples/neural_networks/plot_mnist_filters.py +++ b/examples/neural_networks/plot_mnist_filters.py @@ -25,11 +25,13 @@ """ import warnings + import matplotlib.pyplot as plt + from sklearn.datasets import fetch_openml from sklearn.exceptions import ConvergenceWarning -from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split +from sklearn.neural_network import MLPClassifier # Load data from https://www.openml.org/d/554 X, y = fetch_openml( diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py index de939922d9514..3ba878d4ad191 100644 --- a/examples/neural_networks/plot_rbm_logistic_classification.py +++ b/examples/neural_networks/plot_rbm_logistic_classification.py @@ -23,13 +23,11 @@ # linear shifts of 1 pixel in each direction. import numpy as np - from scipy.ndimage import convolve from sklearn import datasets -from sklearn.preprocessing import minmax_scale - from sklearn.model_selection import train_test_split +from sklearn.preprocessing import minmax_scale def nudge_dataset(X, Y): diff --git a/examples/preprocessing/plot_all_scaling.py b/examples/preprocessing/plot_all_scaling.py index 49af744011d12..b02d98a0a4e77 100644 --- a/examples/preprocessing/plot_all_scaling.py +++ b/examples/preprocessing/plot_all_scaling.py @@ -46,22 +46,22 @@ # Thomas Unterthiner # License: BSD 3 clause -import numpy as np - import matplotlib as mpl -from matplotlib import pyplot as plt +import numpy as np from matplotlib import cm - -from sklearn.preprocessing import MinMaxScaler -from sklearn.preprocessing import minmax_scale -from sklearn.preprocessing import MaxAbsScaler -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import RobustScaler -from sklearn.preprocessing import Normalizer -from sklearn.preprocessing import QuantileTransformer -from sklearn.preprocessing import PowerTransformer +from matplotlib import pyplot as plt from sklearn.datasets import fetch_california_housing +from sklearn.preprocessing import ( + MaxAbsScaler, + MinMaxScaler, + Normalizer, + PowerTransformer, + QuantileTransformer, + RobustScaler, + StandardScaler, + minmax_scale, +) dataset = fetch_california_housing() X_full, y_full = dataset.data, dataset.target diff --git a/examples/preprocessing/plot_discretization.py b/examples/preprocessing/plot_discretization.py index d064ea705903b..3806eb26fd68b 100644 --- a/examples/preprocessing/plot_discretization.py +++ b/examples/preprocessing/plot_discretization.py @@ -32,8 +32,8 @@ # Hanmin Qin # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from sklearn.linear_model import LinearRegression from sklearn.preprocessing import KBinsDiscretizer diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py index ff3d2973caff3..acff3f6b2089d 100644 --- a/examples/preprocessing/plot_discretization_classification.py +++ b/examples/preprocessing/plot_discretization_classification.py @@ -34,20 +34,19 @@ # # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib.colors import ListedColormap -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -from sklearn.datasets import make_moons, make_circles, make_classification + +from sklearn.datasets import make_circles, make_classification, make_moons +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import KBinsDiscretizer +from sklearn.preprocessing import KBinsDiscretizer, StandardScaler from sklearn.svm import SVC, LinearSVC -from sklearn.ensemble import GradientBoostingClassifier from sklearn.utils._testing import ignore_warnings -from sklearn.exceptions import ConvergenceWarning h = 0.02 # step size in the mesh diff --git a/examples/preprocessing/plot_discretization_strategies.py b/examples/preprocessing/plot_discretization_strategies.py index d6b9cb16ee53c..47342df2f221e 100644 --- a/examples/preprocessing/plot_discretization_strategies.py +++ b/examples/preprocessing/plot_discretization_strategies.py @@ -20,11 +20,11 @@ # Author: Tom Dupré la Tour # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.preprocessing import KBinsDiscretizer from sklearn.datasets import make_blobs +from sklearn.preprocessing import KBinsDiscretizer strategies = ["uniform", "quantile", "kmeans"] diff --git a/examples/preprocessing/plot_map_data_to_normal.py b/examples/preprocessing/plot_map_data_to_normal.py index 42a61d84fa384..a521039098871 100644 --- a/examples/preprocessing/plot_map_data_to_normal.py +++ b/examples/preprocessing/plot_map_data_to_normal.py @@ -38,13 +38,11 @@ # Nicolas Hug # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np -from sklearn.preprocessing import PowerTransformer -from sklearn.preprocessing import QuantileTransformer from sklearn.model_selection import train_test_split - +from sklearn.preprocessing import PowerTransformer, QuantileTransformer N_SAMPLES = 1000 FONT_SIZE = 6 diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py index 8ba1263b07d10..fba668b249084 100644 --- a/examples/preprocessing/plot_scaling_importance.py +++ b/examples/preprocessing/plot_scaling_importance.py @@ -43,13 +43,13 @@ """ import matplotlib.pyplot as plt -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler +from sklearn.datasets import load_wine from sklearn.decomposition import PCA -from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score -from sklearn.datasets import load_wine +from sklearn.model_selection import train_test_split +from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler # Code source: Tyler Lanigan # Sebastian Raschka diff --git a/examples/release_highlights/plot_release_highlights_0_22_0.py b/examples/release_highlights/plot_release_highlights_0_22_0.py index 57f1527b7fddb..3d6846a32f97f 100644 --- a/examples/release_highlights/plot_release_highlights_0_22_0.py +++ b/examples/release_highlights/plot_release_highlights_0_22_0.py @@ -34,12 +34,13 @@ # :class:`~metrics.plot_confusion_matrix`. Read more about this new API in the # :ref:`User Guide `. +import matplotlib.pyplot as plt + +from sklearn.datasets import make_classification +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import plot_roc_curve from sklearn.model_selection import train_test_split from sklearn.svm import SVC -from sklearn.metrics import plot_roc_curve -from sklearn.ensemble import RandomForestClassifier -from sklearn.datasets import make_classification -import matplotlib.pyplot as plt X, y = make_classification(random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) @@ -73,12 +74,12 @@ # Read more in the :ref:`User Guide `. from sklearn.datasets import load_iris -from sklearn.svm import LinearSVC -from sklearn.linear_model import LogisticRegression -from sklearn.preprocessing import StandardScaler -from sklearn.pipeline import make_pipeline from sklearn.ensemble import StackingClassifier +from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.svm import LinearSVC X, y = load_iris(return_X_y=True) estimators = [ @@ -96,8 +97,9 @@ # The :func:`inspection.permutation_importance` can be used to get an # estimate of the importance of each feature, for any fitted estimator: -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.inspection import permutation_importance @@ -149,8 +151,9 @@ # See more details in the :ref:`User Guide `. from tempfile import TemporaryDirectory -from sklearn.neighbors import KNeighborsTransformer + from sklearn.manifold import Isomap +from sklearn.neighbors import KNeighborsTransformer from sklearn.pipeline import make_pipeline X, y = make_classification(random_state=0) @@ -266,8 +269,8 @@ def test_sklearn_compatible_estimator(estimator, check): from sklearn.datasets import make_classification -from sklearn.svm import SVC from sklearn.metrics import roc_auc_score +from sklearn.svm import SVC X, y = make_classification(n_classes=4, n_informative=16) clf = SVC(decision_function_shape="ovo", probability=True).fit(X, y) diff --git a/examples/release_highlights/plot_release_highlights_0_23_0.py b/examples/release_highlights/plot_release_highlights_0_23_0.py index 4d6a914e01fb3..8b298b0605272 100644 --- a/examples/release_highlights/plot_release_highlights_0_23_0.py +++ b/examples/release_highlights/plot_release_highlights_0_23_0.py @@ -35,9 +35,10 @@ # 'poisson' loss as well. import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.linear_model import PoissonRegressor + from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.linear_model import PoissonRegressor +from sklearn.model_selection import train_test_split n_samples, n_features = 1000, 20 rng = np.random.RandomState(0) @@ -63,11 +64,11 @@ # this feature. from sklearn import set_config -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import OneHotEncoder, StandardScaler -from sklearn.impute import SimpleImputer from sklearn.compose import make_column_transformer +from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler set_config(display="diagram") @@ -85,6 +86,8 @@ clf = make_pipeline(preprocessor, LogisticRegression()) clf +import numpy as np + ############################################################################## # Scalability and stability improvements to KMeans # ------------------------------------------------ @@ -95,11 +98,11 @@ # effect anymore. For more details on how to control the number of threads, # please refer to our :ref:`parallelism` notes. import scipy -import numpy as np -from sklearn.model_selection import train_test_split + from sklearn.cluster import KMeans from sklearn.datasets import make_blobs from sklearn.metrics import completeness_score +from sklearn.model_selection import train_test_split rng = np.random.RandomState(0) X, y = make_blobs(random_state=rng) @@ -125,9 +128,10 @@ # effect of the first feature, instead of fitting the noise. import numpy as np from matplotlib import pyplot as plt -from sklearn.model_selection import train_test_split -from sklearn.inspection import plot_partial_dependence + from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.inspection import plot_partial_dependence +from sklearn.model_selection import train_test_split n_samples = 500 rng = np.random.RandomState(0) @@ -166,10 +170,11 @@ # The two linear regressors :class:`~sklearn.linear_model.Lasso` and # :class:`~sklearn.linear_model.ElasticNet` now support sample weights. -from sklearn.model_selection import train_test_split +import numpy as np + from sklearn.datasets import make_regression from sklearn.linear_model import Lasso -import numpy as np +from sklearn.model_selection import train_test_split n_samples, n_features = 1000, 20 rng = np.random.RandomState(0) diff --git a/examples/release_highlights/plot_release_highlights_0_24_0.py b/examples/release_highlights/plot_release_highlights_0_24_0.py index 7e044db11ccd3..888e6e5af9463 100644 --- a/examples/release_highlights/plot_release_highlights_0_24_0.py +++ b/examples/release_highlights/plot_release_highlights_0_24_0.py @@ -51,10 +51,11 @@ import numpy as np from scipy.stats import randint + +from sklearn.datasets import make_classification +from sklearn.ensemble import RandomForestClassifier from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.model_selection import HalvingRandomSearchCV -from sklearn.ensemble import RandomForestClassifier -from sklearn.datasets import make_classification rng = np.random.RandomState(0) @@ -118,6 +119,7 @@ # Read more in the :ref:`User guide `. import numpy as np + from sklearn import datasets from sklearn.semi_supervised import SelfTrainingClassifier from sklearn.svm import SVC @@ -140,9 +142,9 @@ # (backward selection), based on a cross-validated score maximization. # See the :ref:`User Guide `. +from sklearn.datasets import load_iris from sklearn.feature_selection import SequentialFeatureSelector from sklearn.neighbors import KNeighborsClassifier -from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True, as_frame=True) feature_names = X.columns @@ -163,11 +165,11 @@ # :class:`~sklearn.preprocessing.PolynomialFeatures`. from sklearn.datasets import fetch_covtype -from sklearn.pipeline import make_pipeline -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import MinMaxScaler from sklearn.kernel_approximation import PolynomialCountSketch from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import MinMaxScaler X, y = fetch_covtype(return_X_y=True) pipe = make_pipeline( @@ -194,8 +196,8 @@ # prediction on a feature for each sample separately, with one line per sample. # See the :ref:`User Guide ` -from sklearn.ensemble import RandomForestRegressor from sklearn.datasets import fetch_california_housing +from sklearn.ensemble import RandomForestRegressor from sklearn.inspection import plot_partial_dependence X, y = fetch_california_housing(return_X_y=True, as_frame=True) @@ -226,10 +228,11 @@ # splitting criterion. Setting `criterion="poisson"` might be a good choice # if your target is a count or a frequency. -from sklearn.tree import DecisionTreeRegressor -from sklearn.model_selection import train_test_split import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.tree import DecisionTreeRegressor + n_samples, n_features = 1000, 20 rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) diff --git a/examples/release_highlights/plot_release_highlights_1_0_0.py b/examples/release_highlights/plot_release_highlights_1_0_0.py index 079d87a5d4f51..987d02c840e48 100644 --- a/examples/release_highlights/plot_release_highlights_1_0_0.py +++ b/examples/release_highlights/plot_release_highlights_1_0_0.py @@ -89,6 +89,7 @@ # refer to the :ref:`User Guide `. import numpy as np + from sklearn.preprocessing import SplineTransformer X = np.arange(5).reshape(5, 1) @@ -134,6 +135,8 @@ # :align: center # :scale: 50% +import pandas as pd + ############################################################################## # Feature Names Support # -------------------------------------------------------------------------- @@ -146,12 +149,13 @@ # non-:term:`fit`, such as :term:`predict`, are consistent with features in # :term:`fit`: from sklearn.preprocessing import StandardScaler -import pandas as pd X = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) scalar = StandardScaler().fit(X) scalar.feature_names_in_ +import pandas as pd + # %% # The support of :term:`get_feature_names_out` is available for transformers # that already had :term:`get_feature_names` and transformers with a one-to-one @@ -162,7 +166,6 @@ # combine feature names of its transformers: from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder -import pandas as pd X = pd.DataFrame({"pet": ["dog", "cat", "fish"], "age": [3, 7, 1]}) preprocessor = ColumnTransformer( diff --git a/examples/release_highlights/plot_release_highlights_1_1_0.py b/examples/release_highlights/plot_release_highlights_1_1_0.py index 7021cd2bbd821..99b669930858e 100644 --- a/examples/release_highlights/plot_release_highlights_1_1_0.py +++ b/examples/release_highlights/plot_release_highlights_1_1_0.py @@ -21,14 +21,15 @@ """ +import matplotlib.pyplot as plt +import numpy as np + # %% # Quantile loss in :class:`ensemble.HistGradientBoostingRegressor` # ---------------------------------------------------------------- # :class:`ensemble.HistGradientBoostingRegressor` can model quantiles with # `loss="quantile"` and the new parameter `quantile`. from sklearn.ensemble import HistGradientBoostingRegressor -import numpy as np -import matplotlib.pyplot as plt # Simple regression function for X * cos(X) rng = np.random.RandomState(42) @@ -59,12 +60,12 @@ # :class:`pipeline.Pipeline` to construct the output feature names for more complex # pipelines: from sklearn.compose import ColumnTransformer -from sklearn.preprocessing import OneHotEncoder, StandardScaler -from sklearn.pipeline import make_pipeline -from sklearn.impute import SimpleImputer -from sklearn.feature_selection import SelectKBest from sklearn.datasets import fetch_openml +from sklearn.feature_selection import SelectKBest +from sklearn.impute import SimpleImputer from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler X, y = fetch_openml( "titanic", version=1, as_frame=True, return_X_y=True, parser="pandas" @@ -100,6 +101,8 @@ plt.tight_layout() +import numpy as np + # %% # Grouping infrequent categories in :class:`OneHotEncoder` # -------------------------------------------------------- @@ -108,7 +111,6 @@ # categories are `min_frequency` and `max_categories`. See the # :ref:`User Guide ` for more details. from sklearn.preprocessing import OneHotEncoder -import numpy as np X = np.array( [["dog"] * 5 + ["cat"] * 20 + ["rabbit"] * 10 + ["snake"] * 3], dtype=object @@ -175,6 +177,7 @@ # online learning when the data is not readily available from the start, or when the # data does not fit into memory. import numpy as np + from sklearn.decomposition import MiniBatchNMF rng = np.random.RandomState(0) @@ -197,6 +200,10 @@ f"{np.sum((X - X_reconstructed) ** 2) / np.sum(X**2):.5f}", ) +import matplotlib.pyplot as plt + +from sklearn.cluster import BisectingKMeans, KMeans + # %% # BisectingKMeans: divide and cluster # ----------------------------------- @@ -206,8 +213,6 @@ # new clusters repeatedly until the target number of clusters is reached, giving a # hierarchical structure to the clustering. from sklearn.datasets import make_blobs -from sklearn.cluster import KMeans, BisectingKMeans -import matplotlib.pyplot as plt X, _ = make_blobs(n_samples=1000, centers=2, random_state=0) diff --git a/examples/semi_supervised/plot_label_propagation_digits.py b/examples/semi_supervised/plot_label_propagation_digits.py index f848e3b76e084..dc4ed674a21bc 100644 --- a/examples/semi_supervised/plot_label_propagation_digits.py +++ b/examples/semi_supervised/plot_label_propagation_digits.py @@ -19,13 +19,14 @@ class will be very good. # Authors: Clay Woolam # License: BSD +import numpy as np + # %% # Data generation # --------------- # # We use the digits dataset. We only use a subset of randomly selected samples. from sklearn import datasets -import numpy as np digits = datasets.load_digits() rng = np.random.RandomState(2) @@ -53,6 +54,8 @@ class will be very good. y_train = np.copy(y) y_train[unlabeled_set] = -1 +from sklearn.metrics import classification_report + # %% # Semi-supervised learning # ------------------------ @@ -60,7 +63,6 @@ class will be very good. # We fit a :class:`~sklearn.semi_supervised.LabelSpreading` and use it to predict # the unknown labels. from sklearn.semi_supervised import LabelSpreading -from sklearn.metrics import classification_report lp_model = LabelSpreading(gamma=0.25, max_iter=20) lp_model.fit(X, y_train) diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py index 7d4a348cad9b6..9a5facc15bd71 100644 --- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py +++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py @@ -23,13 +23,13 @@ # Authors: Clay Woolam # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np from scipy import stats from sklearn import datasets -from sklearn.semi_supervised import LabelSpreading from sklearn.metrics import classification_report, confusion_matrix +from sklearn.semi_supervised import LabelSpreading digits = datasets.load_digits() rng = np.random.RandomState(0) diff --git a/examples/semi_supervised/plot_label_propagation_structure.py b/examples/semi_supervised/plot_label_propagation_structure.py index 97e8e19c2b3b3..de5a72ddf33fa 100644 --- a/examples/semi_supervised/plot_label_propagation_structure.py +++ b/examples/semi_supervised/plot_label_propagation_structure.py @@ -22,6 +22,7 @@ # Here, all labels but two are tagged as unknown. import numpy as np + from sklearn.datasets import make_circles n_samples = 200 diff --git a/examples/semi_supervised/plot_self_training_varying_threshold.py b/examples/semi_supervised/plot_self_training_varying_threshold.py index 801e48b8411f5..2c7a485d06eb0 100644 --- a/examples/semi_supervised/plot_self_training_varying_threshold.py +++ b/examples/semi_supervised/plot_self_training_varying_threshold.py @@ -32,13 +32,14 @@ # Authors: Oliver Rausch # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets -from sklearn.svm import SVC +from sklearn.metrics import accuracy_score from sklearn.model_selection import StratifiedKFold from sklearn.semi_supervised import SelfTrainingClassifier -from sklearn.metrics import accuracy_score +from sklearn.svm import SVC from sklearn.utils import shuffle n_splits = 3 diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py index a2ffe53ca2182..08f8e477d1971 100644 --- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py +++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py @@ -15,15 +15,13 @@ import numpy as np from sklearn.datasets import fetch_20newsgroups -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.feature_extraction.text import TfidfTransformer -from sklearn.preprocessing import FunctionTransformer +from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.linear_model import SGDClassifier +from sklearn.metrics import f1_score from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline -from sklearn.semi_supervised import SelfTrainingClassifier -from sklearn.semi_supervised import LabelSpreading -from sklearn.metrics import f1_score +from sklearn.preprocessing import FunctionTransformer +from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier # Loading dataset containing first five categories data = fetch_20newsgroups( diff --git a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py index 402cd41d6a0f2..766f7ea0a79c6 100644 --- a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py +++ b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py @@ -18,13 +18,12 @@ # Oliver Rausch # License: BSD -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import datasets +from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier from sklearn.svm import SVC -from sklearn.semi_supervised import LabelSpreading -from sklearn.semi_supervised import SelfTrainingClassifier - iris = datasets.load_iris() diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py index c2c3bc6e6ba28..cacd67ed056ac 100644 --- a/examples/svm/plot_custom_kernel.py +++ b/examples/svm/plot_custom_kernel.py @@ -8,9 +8,10 @@ """ -import numpy as np import matplotlib.pyplot as plt -from sklearn import svm, datasets +import numpy as np + +from sklearn import datasets, svm from sklearn.inspection import DecisionBoundaryDisplay # import some data to play with diff --git a/examples/svm/plot_iris_svc.py b/examples/svm/plot_iris_svc.py index 5931ad57c263f..d13a9fe49c803 100644 --- a/examples/svm/plot_iris_svc.py +++ b/examples/svm/plot_iris_svc.py @@ -35,9 +35,9 @@ """ import matplotlib.pyplot as plt -from sklearn import svm, datasets -from sklearn.inspection import DecisionBoundaryDisplay +from sklearn import datasets, svm +from sklearn.inspection import DecisionBoundaryDisplay # import some data to play with iris = datasets.load_iris() diff --git a/examples/svm/plot_linearsvc_support_vectors.py b/examples/svm/plot_linearsvc_support_vectors.py index 7fdfea416013f..7f82b6c8bb0fe 100644 --- a/examples/svm/plot_linearsvc_support_vectors.py +++ b/examples/svm/plot_linearsvc_support_vectors.py @@ -9,11 +9,12 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_blobs -from sklearn.svm import LinearSVC from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.svm import LinearSVC X, y = make_blobs(n_samples=40, centers=2, random_state=0) diff --git a/examples/svm/plot_oneclass.py b/examples/svm/plot_oneclass.py index 082cbcd6de2be..d4348fa0ec435 100644 --- a/examples/svm/plot_oneclass.py +++ b/examples/svm/plot_oneclass.py @@ -11,9 +11,10 @@ """ -import numpy as np -import matplotlib.pyplot as plt import matplotlib.font_manager +import matplotlib.pyplot as plt +import numpy as np + from sklearn import svm xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500)) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index fa4310134487a..ba0154b477b46 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -135,9 +135,8 @@ def __call__(self, value, clip=None): # 10 is often helpful. Using a basis of 2, a finer # tuning can be achieved but at a much higher cost. +from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit from sklearn.svm import SVC -from sklearn.model_selection import StratifiedShuffleSplit -from sklearn.model_selection import GridSearchCV C_range = np.logspace(-2, 10, 13) gamma_range = np.logspace(-9, 3, 13) diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index 45bacff6a2b97..23f464169f516 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -10,11 +10,11 @@ """ import matplotlib.pyplot as plt + from sklearn import svm from sklearn.datasets import make_blobs from sklearn.inspection import DecisionBoundaryDisplay - # we create 40 separable points X, y = make_blobs(n_samples=40, centers=2, random_state=6) diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index fe71420ffd0b3..6fd7de98f3fb6 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -26,6 +26,7 @@ """ import matplotlib.pyplot as plt + from sklearn import svm from sklearn.datasets import make_blobs from sklearn.inspection import DecisionBoundaryDisplay diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index 730d6a35f35a8..d26f0eed18fa0 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -14,6 +14,7 @@ # Load some data to play with # --------------------------- import numpy as np + from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) @@ -22,11 +23,12 @@ rng = np.random.RandomState(0) X = np.hstack((X, 2 * rng.random((X.shape[0], 36)))) +from sklearn.feature_selection import SelectPercentile, chi2 + # %% # Create the pipeline # ------------------- from sklearn.pipeline import Pipeline -from sklearn.feature_selection import SelectPercentile, chi2 from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC @@ -45,6 +47,7 @@ # Plot the cross-validation score as a function of percentile of features # ----------------------------------------------------------------------- import matplotlib.pyplot as plt + from sklearn.model_selection import cross_val_score score_means = list() diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py index dd7eb43e15231..70c1f13daad3e 100644 --- a/examples/svm/plot_svm_kernels.py +++ b/examples/svm/plot_svm_kernels.py @@ -14,10 +14,10 @@ # Code source: Gaël Varoquaux # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn import svm +import numpy as np +from sklearn import svm # Our dataset and targets X = np.c_[ diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index 9f52881f1faf2..c36fcfdd05fce 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -18,9 +18,10 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib import cm + from sklearn import svm # we create 40 separable points diff --git a/examples/svm/plot_svm_nonlinear.py b/examples/svm/plot_svm_nonlinear.py index f88231b4b6af4..4990e509661a1 100644 --- a/examples/svm/plot_svm_nonlinear.py +++ b/examples/svm/plot_svm_nonlinear.py @@ -11,8 +11,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import svm xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index 75a16b571c3ea..ab34528a37af6 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -7,9 +7,10 @@ """ +import matplotlib.pyplot as plt import numpy as np + from sklearn.svm import SVR -import matplotlib.pyplot as plt # %% # Generate sample data diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py index b7e367e45d531..1bd15bb64d683 100644 --- a/examples/svm/plot_svm_scale_c.py +++ b/examples/svm/plot_svm_scale_c.py @@ -82,14 +82,13 @@ # Jaques Grobler # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt +import numpy as np +from sklearn import datasets +from sklearn.model_selection import GridSearchCV, ShuffleSplit from sklearn.svm import LinearSVC -from sklearn.model_selection import ShuffleSplit -from sklearn.model_selection import GridSearchCV from sklearn.utils import check_random_state -from sklearn import datasets rnd = check_random_state(1) diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py index e12460b494c02..be1749c0a4535 100644 --- a/examples/svm/plot_svm_tie_breaking.py +++ b/examples/svm/plot_svm_tie_breaking.py @@ -17,10 +17,11 @@ # Code source: Andreas Mueller, Adrin Jalali # License: BSD 3 clause -import numpy as np import matplotlib.pyplot as plt -from sklearn.svm import SVC +import numpy as np + from sklearn.datasets import make_blobs +from sklearn.svm import SVC X, y = make_blobs(random_state=27) diff --git a/examples/svm/plot_weighted_samples.py b/examples/svm/plot_weighted_samples.py index f346599300aba..c17742e091390 100644 --- a/examples/svm/plot_weighted_samples.py +++ b/examples/svm/plot_weighted_samples.py @@ -14,8 +14,9 @@ """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn import svm diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py index 7f24861a0e9ce..a13fe259622cb 100644 --- a/examples/text/plot_document_classification_20newsgroups.py +++ b/examples/text/plot_document_classification_20newsgroups.py @@ -87,8 +87,7 @@ def size_mb(docs): # Extracting features from the training data using a sparse vectorizer from time import time -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.feature_extraction.text import HashingVectorizer +from sklearn.feature_extraction.text import HashingVectorizer, TfidfVectorizer t0 = time() @@ -142,6 +141,7 @@ def size_mb(docs): # # First we define small benchmarking utilities import numpy as np + from sklearn import metrics from sklearn.utils.extmath import density @@ -190,21 +190,22 @@ def benchmark(clf): return clf_descr, score, train_time, test_time +from sklearn.ensemble import RandomForestClassifier + # %% # We now train and test the datasets with 15 different classification # models and get performance results for each model. from sklearn.feature_selection import SelectFromModel -from sklearn.linear_model import RidgeClassifier +from sklearn.linear_model import ( + PassiveAggressiveClassifier, + Perceptron, + RidgeClassifier, + SGDClassifier, +) +from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB +from sklearn.neighbors import KNeighborsClassifier, NearestCentroid from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC -from sklearn.linear_model import SGDClassifier -from sklearn.linear_model import Perceptron -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB -from sklearn.neighbors import KNeighborsClassifier -from sklearn.neighbors import NearestCentroid -from sklearn.ensemble import RandomForestClassifier - results = [] for clf, name in ( diff --git a/examples/text/plot_document_clustering.py b/examples/text/plot_document_clustering.py index 24af666330e5c..c207a4b530cd6 100644 --- a/examples/text/plot_document_clustering.py +++ b/examples/text/plot_document_clustering.py @@ -53,24 +53,24 @@ # Lars Buitinck # License: BSD 3 clause -from sklearn.datasets import fetch_20newsgroups -from sklearn.decomposition import TruncatedSVD -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.feature_extraction.text import TfidfTransformer -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import Normalizer -from sklearn import metrics - -from sklearn.cluster import KMeans, MiniBatchKMeans - import logging -from optparse import OptionParser import sys +from optparse import OptionParser from time import time import numpy as np +from sklearn import metrics +from sklearn.cluster import KMeans, MiniBatchKMeans +from sklearn.datasets import fetch_20newsgroups +from sklearn.decomposition import TruncatedSVD +from sklearn.feature_extraction.text import ( + HashingVectorizer, + TfidfTransformer, + TfidfVectorizer, +) +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import Normalizer # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") diff --git a/examples/text/plot_hashing_vs_dict_vectorizer.py b/examples/text/plot_hashing_vs_dict_vectorizer.py index ce359cd137487..92092b2c078b6 100644 --- a/examples/text/plot_hashing_vs_dict_vectorizer.py +++ b/examples/text/plot_hashing_vs_dict_vectorizer.py @@ -19,9 +19,9 @@ # Author: Lars Buitinck # License: BSD 3 clause -from collections import defaultdict import re import sys +from collections import defaultdict from time import time import numpy as np diff --git a/examples/tree/plot_cost_complexity_pruning.py b/examples/tree/plot_cost_complexity_pruning.py index d21d163c9a1e3..b232389ea9ded 100644 --- a/examples/tree/plot_cost_complexity_pruning.py +++ b/examples/tree/plot_cost_complexity_pruning.py @@ -18,8 +18,9 @@ """ import matplotlib.pyplot as plt -from sklearn.model_selection import train_test_split + from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier # %% diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py index 14f6506b5810f..15cdd36129a73 100644 --- a/examples/tree/plot_iris_dtc.py +++ b/examples/tree/plot_iris_dtc.py @@ -21,15 +21,15 @@ iris = load_iris() +import matplotlib.pyplot as plt + # %% # Display the decision functions of trees trained on all pairs of features. import numpy as np -import matplotlib.pyplot as plt from sklearn.datasets import load_iris -from sklearn.tree import DecisionTreeClassifier from sklearn.inspection import DecisionBoundaryDisplay - +from sklearn.tree import DecisionTreeClassifier # Parameters n_classes = 3 diff --git a/examples/tree/plot_tree_regression.py b/examples/tree/plot_tree_regression.py index 6ed28a5cbfa99..a819c092dbae1 100644 --- a/examples/tree/plot_tree_regression.py +++ b/examples/tree/plot_tree_regression.py @@ -14,10 +14,12 @@ details of the training data and learn from the noise, i.e. they overfit. """ +import matplotlib.pyplot as plt + # Import the necessary modules and libraries import numpy as np + from sklearn.tree import DecisionTreeRegressor -import matplotlib.pyplot as plt # Create a random dataset rng = np.random.RandomState(1) diff --git a/examples/tree/plot_tree_regression_multioutput.py b/examples/tree/plot_tree_regression_multioutput.py index a75652a6ddd56..b6d2800d2732d 100644 --- a/examples/tree/plot_tree_regression_multioutput.py +++ b/examples/tree/plot_tree_regression_multioutput.py @@ -15,8 +15,9 @@ details of the training data and learn from the noise, i.e. they overfit. """ -import numpy as np import matplotlib.pyplot as plt +import numpy as np + from sklearn.tree import DecisionTreeRegressor # Create a random dataset diff --git a/examples/tree/plot_unveil_tree_structure.py b/examples/tree/plot_unveil_tree_structure.py index 6313d0ccbb74f..d4009e3111f7f 100644 --- a/examples/tree/plot_unveil_tree_structure.py +++ b/examples/tree/plot_unveil_tree_structure.py @@ -19,10 +19,10 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn.model_selection import train_test_split +from sklearn import tree from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier -from sklearn import tree ############################################################################## # Train tree classifier diff --git a/maint_tools/check_pxd_in_installation.py b/maint_tools/check_pxd_in_installation.py index b792912048350..0b1e47ca95aa7 100644 --- a/maint_tools/check_pxd_in_installation.py +++ b/maint_tools/check_pxd_in_installation.py @@ -6,12 +6,11 @@ """ import os -import sys import pathlib +import subprocess +import sys import tempfile import textwrap -import subprocess - sklearn_dir = pathlib.Path(sys.argv[1]) pxd_files = list(sklearn_dir.glob("**/*.pxd")) diff --git a/maint_tools/sort_whats_new.py b/maint_tools/sort_whats_new.py index 9a45e31322c05..c9820cc984ba5 100755 --- a/maint_tools/sort_whats_new.py +++ b/maint_tools/sort_whats_new.py @@ -2,8 +2,8 @@ # Sorts what's new entries with per-module headings. # Pass what's new entries on stdin. -import sys import re +import sys from collections import defaultdict LABEL_ORDER = ["MajorFeature", "Feature", "Enhancement", "Efficiency", "Fix", "API"] diff --git a/maint_tools/update_tracking_issue.py b/maint_tools/update_tracking_issue.py index 855c733cffb31..7463e2388cdaf 100644 --- a/maint_tools/update_tracking_issue.py +++ b/maint_tools/update_tracking_issue.py @@ -11,9 +11,9 @@ github account that does **not** have commit access to the public repo. """ -from pathlib import Path -import sys import argparse +import sys +from pathlib import Path import defusedxml.ElementTree as ET from github import Github diff --git a/setup.py b/setup.py index 7ad32e95e53a5..fe3a00cda3959 100755 --- a/setup.py +++ b/setup.py @@ -4,18 +4,17 @@ # 2010 Fabian Pedregosa # License: 3-clause BSD -import sys +import importlib import os import platform import shutil - -# We need to import setuptools before because it monkey-patches distutils -import setuptools # noqa +import sys +import traceback from distutils.command.clean import clean as Clean from distutils.command.sdist import sdist -import traceback -import importlib +# We need to import setuptools before because it monkey-patches distutils +import setuptools # noqa try: import builtins @@ -53,7 +52,6 @@ import sklearn._min_dependencies as min_deps # noqa from sklearn.externals._packaging.version import parse as parse_version # noqa - VERSION = sklearn.__version__ @@ -165,6 +163,7 @@ def configuration(parent_package="", top_path=None): os.remove("MANIFEST") from numpy.distutils.misc_util import Configuration + from sklearn._build_utils import _check_cython_version config = Configuration(None, parent_package, top_path) @@ -305,7 +304,7 @@ def setup_package(): # These commands require the setup from numpy.distutils because they # may use numpy.distutils compiler classes. - from numpy.distutils.core import setup + from distutils.ccompiler import CCompiler # Monkeypatches CCompiler.spawn to prevent random wheel build errors on Windows # The build errors on Windows was because msvccompiler spawn was not threadsafe @@ -314,7 +313,8 @@ def setup_package(): # https://github.com/scikit-learn/scikit-learn/issues/22310 # https://github.com/numpy/numpy/pull/20640 from numpy.distutils.ccompiler import replace_method - from distutils.ccompiler import CCompiler + from numpy.distutils.core import setup + from sklearn.externals._numpy_compiler_patch import CCompiler_spawn replace_method(CCompiler, "spawn", CCompiler_spawn) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 097501b0c5c6a..71e70d08b7924 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -12,13 +12,12 @@ See http://scikit-learn.org for complete documentation. """ -import sys import logging import os import random +import sys - -from ._config import get_config, set_config, config_context +from ._config import config_context, get_config, set_config logger = logging.getLogger(__name__) @@ -77,8 +76,8 @@ # It is necessary to do this prior to importing show_versions as the # later is linked to the OpenMP runtime to make it possible to introspect # it and importing it first would fail if the OpenMP dll cannot be found. - from . import _distributor_init # noqa: F401 from . import __check_build # noqa: F401 + from . import _distributor_init # noqa: F401 from .base import clone from .utils._show_versions import show_versions diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py index d8206a3a715f8..8a9d62050088d 100644 --- a/sklearn/_build_utils/__init__.py +++ b/sklearn/_build_utils/__init__.py @@ -5,16 +5,15 @@ # license: BSD -import os -import sklearn import contextlib - +import os from distutils.version import LooseVersion -from .pre_build_helpers import basic_check_build -from .openmp_helpers import check_openmp_support -from .._min_dependencies import CYTHON_MIN_VERSION +import sklearn +from .._min_dependencies import CYTHON_MIN_VERSION +from .openmp_helpers import check_openmp_support +from .pre_build_helpers import basic_check_build DEFAULT_ROOT = "sklearn" diff --git a/sklearn/_build_utils/openmp_helpers.py b/sklearn/_build_utils/openmp_helpers.py index 192e96cd30765..cd16db9684bbb 100644 --- a/sklearn/_build_utils/openmp_helpers.py +++ b/sklearn/_build_utils/openmp_helpers.py @@ -5,11 +5,10 @@ import os +import subprocess import sys import textwrap import warnings -import subprocess - from distutils.errors import CompileError, LinkError from .pre_build_helpers import compile_test_program diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py index 0a2a942f7991e..01ab527d4e67a 100644 --- a/sklearn/_build_utils/pre_build_helpers.py +++ b/sklearn/_build_utils/pre_build_helpers.py @@ -1,17 +1,17 @@ """Helpers to check build environment before actual build of scikit-learn""" +import glob import os +import subprocess import sys -import glob import tempfile import textwrap -import setuptools # noqa -import subprocess import warnings - from distutils.dist import Distribution from distutils.sysconfig import customize_compiler +import setuptools # noqa + # NumPy 1.23 deprecates numpy.distutils with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) diff --git a/sklearn/_config.py b/sklearn/_config.py index c865b879dbea3..832ead13df655 100644 --- a/sklearn/_config.py +++ b/sklearn/_config.py @@ -1,8 +1,8 @@ """Global configuration state and functions for management """ import os -from contextlib import contextmanager as contextmanager import threading +from contextlib import contextmanager as contextmanager _global_config = { "assume_finite": bool(os.environ.get("SKLEARN_ASSUME_FINITE", False)), diff --git a/sklearn/_isotonic.pyx b/sklearn/_isotonic.pyx index 34b6871f133e8..21dd4909d79e4 100644 --- a/sklearn/_isotonic.pyx +++ b/sklearn/_isotonic.pyx @@ -5,8 +5,9 @@ # pool at each step. import numpy as np -cimport numpy as cnp + cimport cython +cimport numpy as cnp from cython cimport floating cnp.import_array() diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py index 63ae3038df8ae..7090d1a6a5c83 100644 --- a/sklearn/_loss/__init__.py +++ b/sklearn/_loss/__init__.py @@ -4,18 +4,17 @@ """ from .loss import ( - HalfSquaredError, AbsoluteError, - PinballLoss, - HalfPoissonLoss, + HalfBinomialLoss, HalfGammaLoss, + HalfMultinomialLoss, + HalfPoissonLoss, + HalfSquaredError, HalfTweedieLoss, HalfTweedieLossIdentity, - HalfBinomialLoss, - HalfMultinomialLoss, + PinballLoss, ) - __all__ = [ "HalfSquaredError", "AbsoluteError", diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py index 6fbe675fef533..4542a83bef559 100644 --- a/sklearn/_loss/glm_distribution.py +++ b/sklearn/_loss/glm_distribution.py @@ -9,14 +9,13 @@ # This is only used for backward compatibility in _GeneralizedLinearRegressor # for the deprecated family attribute. +import numbers from abc import ABCMeta, abstractmethod from collections import namedtuple -import numbers import numpy as np from scipy.special import xlogy - DistributionBoundary = namedtuple("DistributionBoundary", ("value", "inclusive")) diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py index 4cb46a15ef263..3e7c242724b9d 100644 --- a/sklearn/_loss/link.py +++ b/sklearn/_loss/link.py @@ -9,6 +9,7 @@ import numpy as np from scipy.special import expit, logit from scipy.stats import gmean + from ..utils.extmath import softmax diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py index ad52ffd438238..d17f55fa6a817 100644 --- a/sklearn/_loss/loss.py +++ b/sklearn/_loss/loss.py @@ -16,29 +16,25 @@ # - Replace link module of GLMs. import numbers + import numpy as np from scipy.special import xlogy + +from ..utils import check_scalar +from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper +from ..utils.stats import _weighted_percentile from ._loss import ( - CyHalfSquaredError, CyAbsoluteError, - CyPinballLoss, - CyHalfPoissonLoss, + CyHalfBinomialLoss, CyHalfGammaLoss, + CyHalfMultinomialLoss, + CyHalfPoissonLoss, + CyHalfSquaredError, CyHalfTweedieLoss, CyHalfTweedieLossIdentity, - CyHalfBinomialLoss, - CyHalfMultinomialLoss, -) -from .link import ( - Interval, - IdentityLink, - LogLink, - LogitLink, - MultinomialLogit, + CyPinballLoss, ) -from ..utils import check_scalar -from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper -from ..utils.stats import _weighted_percentile +from .link import IdentityLink, Interval, LogitLink, LogLink, MultinomialLogit # Note: The shape of raw_prediction for multiclass classifications are diff --git a/sklearn/_loss/setup.py b/sklearn/_loss/setup.py index 2a2d2b5f13b8a..4be1481df1a7f 100644 --- a/sklearn/_loss/setup.py +++ b/sklearn/_loss/setup.py @@ -1,5 +1,6 @@ import numpy from numpy.distutils.misc_util import Configuration + from sklearn._build_utils import gen_from_templates diff --git a/sklearn/_loss/tests/test_glm_distribution.py b/sklearn/_loss/tests/test_glm_distribution.py index aaaa9de39a502..35eae4f5f7418 100644 --- a/sklearn/_loss/tests/test_glm_distribution.py +++ b/sklearn/_loss/tests/test_glm_distribution.py @@ -4,20 +4,17 @@ # # TODO(1.3): remove file import numpy as np -from numpy.testing import ( - assert_allclose, - assert_array_equal, -) -from scipy.optimize import check_grad import pytest +from numpy.testing import assert_allclose, assert_array_equal +from scipy.optimize import check_grad from sklearn._loss.glm_distribution import ( - TweedieDistribution, - NormalDistribution, - PoissonDistribution, + DistributionBoundary, GammaDistribution, InverseGaussianDistribution, - DistributionBoundary, + NormalDistribution, + PoissonDistribution, + TweedieDistribution, ) diff --git a/sklearn/_loss/tests/test_link.py b/sklearn/_loss/tests/test_link.py index 435361eaa50f1..4311293aeda41 100644 --- a/sklearn/_loss/tests/test_link.py +++ b/sklearn/_loss/tests/test_link.py @@ -1,14 +1,8 @@ import numpy as np -from numpy.testing import assert_allclose, assert_array_equal import pytest +from numpy.testing import assert_allclose, assert_array_equal -from sklearn._loss.link import ( - _LINKS, - _inclusive_low_high, - MultinomialLogit, - Interval, -) - +from sklearn._loss.link import _LINKS, Interval, MultinomialLogit, _inclusive_low_high LINK_FUNCTIONS = list(_LINKS.values()) diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py index 8aeb350440005..90c7149c092b7 100644 --- a/sklearn/_loss/tests/test_loss.py +++ b/sklearn/_loss/tests/test_loss.py @@ -1,22 +1,17 @@ import pickle import numpy as np -from numpy.testing import assert_allclose, assert_array_equal import pytest +from numpy.testing import assert_allclose, assert_array_equal from pytest import approx -from scipy.optimize import ( - minimize, - minimize_scalar, - newton, - LinearConstraint, -) +from scipy.optimize import LinearConstraint, minimize, minimize_scalar, newton from scipy.special import logsumexp -from sklearn._loss.link import _inclusive_low_high, IdentityLink +from sklearn._loss.link import IdentityLink, _inclusive_low_high from sklearn._loss.loss import ( _LOSSES, - BaseLoss, AbsoluteError, + BaseLoss, HalfBinomialLoss, HalfGammaLoss, HalfMultinomialLoss, @@ -29,7 +24,6 @@ from sklearn.utils import assert_all_finite from sklearn.utils._testing import create_memmap_backed_data, skip_if_32bit - ALL_LOSSES = list(_LOSSES.values()) LOSS_INSTANCES = [loss() for loss in ALL_LOSSES] diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index 957e1e01f0551..b85f99869ea66 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -1,7 +1,6 @@ """All minimum dependencies for scikit-learn.""" -import platform import argparse - +import platform # scipy and cython should by in sync with pyproject.toml diff --git a/sklearn/base.py b/sklearn/base.py index 757fb0e23841a..c49624a99a07e 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -4,29 +4,29 @@ # License: BSD 3 clause import copy -import warnings -from collections import defaultdict -import platform import inspect +import platform import re +import warnings +from collections import defaultdict import numpy as np from . import __version__ from ._config import get_config from .utils import _IS_32BIT -from .utils._tags import ( - _DEFAULT_TAGS, -) -from .utils.validation import check_X_y -from .utils.validation import check_array -from .utils.validation import _check_y -from .utils.validation import _num_features -from .utils.validation import _check_feature_names_in -from .utils.validation import _generate_get_feature_names_out -from .utils.validation import check_is_fitted from .utils._estimator_html_repr import estimator_html_repr -from .utils.validation import _get_feature_names +from .utils._tags import _DEFAULT_TAGS +from .utils.validation import ( + _check_feature_names_in, + _check_y, + _generate_get_feature_names_out, + _get_feature_names, + _num_features, + check_array, + check_is_fitted, + check_X_y, +) def clone(estimator, *, safe=True): diff --git a/sklearn/calibration.py b/sklearn/calibration.py index f84ff898fc2ee..0e7a66e0022d2 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -8,34 +8,33 @@ # License: BSD 3 clause import warnings -from inspect import signature from functools import partial - +from inspect import signature from math import log -import numpy as np -from joblib import Parallel -from scipy.special import expit -from scipy.special import xlogy +import numpy as np from scipy.optimize import fmin_bfgs +from scipy.special import expit, xlogy + +from joblib import Parallel from .base import ( BaseEstimator, ClassifierMixin, + MetaEstimatorMixin, RegressorMixin, clone, - MetaEstimatorMixin, is_classifier, ) -from .preprocessing import label_binarize, LabelEncoder -from .utils import ( - column_or_1d, - indexable, - check_matplotlib_support, -) - -from .utils.multiclass import check_classification_targets +from .isotonic import IsotonicRegression +from .metrics._base import _check_pos_label_consistency +from .metrics._plot.base import _get_response +from .model_selection import check_cv, cross_val_predict +from .preprocessing import LabelEncoder, label_binarize +from .svm import LinearSVC +from .utils import _safe_indexing, check_matplotlib_support, column_or_1d, indexable from .utils.fixes import delayed +from .utils.multiclass import check_classification_targets from .utils.validation import ( _check_fit_params, _check_sample_weight, @@ -43,12 +42,6 @@ check_consistent_length, check_is_fitted, ) -from .utils import _safe_indexing -from .isotonic import IsotonicRegression -from .svm import LinearSVC -from .model_selection import check_cv, cross_val_predict -from .metrics._base import _check_pos_label_consistency -from .metrics._plot.base import _get_response class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py index 9ba72d341c389..13f42c6aa7a9c 100644 --- a/sklearn/cluster/__init__.py +++ b/sklearn/cluster/__init__.py @@ -3,26 +3,26 @@ algorithms. """ -from ._spectral import spectral_clustering, SpectralClustering -from ._mean_shift import mean_shift, MeanShift, estimate_bandwidth, get_bin_seeds -from ._affinity_propagation import affinity_propagation, AffinityPropagation +from ._affinity_propagation import AffinityPropagation, affinity_propagation from ._agglomerative import ( - ward_tree, AgglomerativeClustering, - linkage_tree, FeatureAgglomeration, + linkage_tree, + ward_tree, ) -from ._kmeans import k_means, KMeans, MiniBatchKMeans, kmeans_plusplus +from ._bicluster import SpectralBiclustering, SpectralCoclustering +from ._birch import Birch from ._bisect_k_means import BisectingKMeans -from ._dbscan import dbscan, DBSCAN +from ._dbscan import DBSCAN, dbscan +from ._kmeans import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus +from ._mean_shift import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift from ._optics import ( OPTICS, cluster_optics_dbscan, - compute_optics_graph, cluster_optics_xi, + compute_optics_graph, ) -from ._bicluster import SpectralBiclustering, SpectralCoclustering -from ._birch import Birch +from ._spectral import SpectralClustering, spectral_clustering __all__ = [ "AffinityPropagation", diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index f0274b113a341..3dd8dff4c8981 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -10,14 +10,12 @@ import numpy as np -from ..exceptions import ConvergenceWarning +from .._config import config_context from ..base import BaseEstimator, ClusterMixin -from ..utils import as_float_array, check_random_state -from ..utils import check_scalar +from ..exceptions import ConvergenceWarning +from ..metrics import euclidean_distances, pairwise_distances_argmin +from ..utils import as_float_array, check_random_state, check_scalar from ..utils.validation import check_is_fitted -from ..metrics import euclidean_distances -from ..metrics import pairwise_distances_argmin -from .._config import config_context def _equal_similarities_and_preferences(S, preference): diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index b399f805a9d40..4078264144f3e 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -15,9 +15,9 @@ from scipy.sparse.csgraph import connected_components from ..base import BaseEstimator, ClusterMixin, _ClassNamePrefixFeaturesOutMixin -from ..metrics.pairwise import paired_distances from ..metrics import DistanceMetric from ..metrics._dist_metrics import METRIC_MAPPING +from ..metrics.pairwise import paired_distances from ..utils import check_array from ..utils._fast_dict import IntFloatDict from ..utils.graph import _fix_connected_components diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py index a360802009f2c..c336d82dd6a48 100644 --- a/sklearn/cluster/_bicluster.py +++ b/sklearn/cluster/_bicluster.py @@ -2,24 +2,19 @@ # Authors : Kemal Eren # License: BSD 3 clause +import numbers from abc import ABCMeta, abstractmethod import numpy as np -import numbers - from scipy.linalg import norm from scipy.sparse import dia_matrix, issparse from scipy.sparse.linalg import eigsh, svds -from . import KMeans, MiniBatchKMeans from ..base import BaseEstimator, BiclusterMixin -from ..utils import check_random_state -from ..utils import check_scalar - +from ..utils import check_random_state, check_scalar from ..utils.extmath import make_nonnegative, randomized_svd, safe_sparse_dot - from ..utils.validation import assert_all_finite - +from ._kmeans import KMeans, MiniBatchKMeans __all__ = ["SpectralCoclustering", "SpectralBiclustering"] diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 2bfdd2971e4d4..0e9c2ba616d99 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -3,26 +3,27 @@ # Joel Nothman # License: BSD 3 clause -import warnings import numbers +import warnings +from math import sqrt + import numpy as np from scipy import sparse -from math import sqrt -from ..metrics import pairwise_distances_argmin -from ..metrics.pairwise import euclidean_distances +from .._config import config_context from ..base import ( - TransformerMixin, - ClusterMixin, BaseEstimator, + ClusterMixin, + TransformerMixin, _ClassNamePrefixFeaturesOutMixin, ) -from ..utils.extmath import row_norms +from ..exceptions import ConvergenceWarning +from ..metrics import pairwise_distances_argmin +from ..metrics.pairwise import euclidean_distances from ..utils import check_scalar, deprecated +from ..utils.extmath import row_norms from ..utils.validation import check_is_fitted -from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering -from .._config import config_context def _iterate_sparse_X(X): diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index c7dc2c5a772e5..d1e1be281de75 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -6,18 +6,21 @@ import numpy as np import scipy.sparse as sp -from ._kmeans import _BaseKMeans -from ._kmeans import _kmeans_single_elkan -from ._kmeans import _kmeans_single_lloyd -from ._kmeans import _labels_inertia_threadpool_limit -from ._k_means_common import _inertia_dense -from ._k_means_common import _inertia_sparse -from ..utils.extmath import row_norms from ..utils._openmp_helpers import _openmp_effective_n_threads -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_sample_weight -from ..utils.validation import check_random_state -from ..utils.validation import _is_arraylike_not_scalar +from ..utils.extmath import row_norms +from ..utils.validation import ( + _check_sample_weight, + _is_arraylike_not_scalar, + check_is_fitted, + check_random_state, +) +from ._k_means_common import _inertia_dense, _inertia_sparse +from ._kmeans import ( + _BaseKMeans, + _kmeans_single_elkan, + _kmeans_single_lloyd, + _labels_inertia_threadpool_limit, +) class _BisectingTree: diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index f5d5bc81e6bba..7975d7e9833ae 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -8,16 +8,16 @@ # # License: BSD 3 clause -import numpy as np import numbers import warnings + +import numpy as np from scipy import sparse -from ..utils import check_scalar from ..base import BaseEstimator, ClusterMixin -from ..utils.validation import _check_sample_weight from ..neighbors import NearestNeighbors - +from ..utils import check_scalar +from ..utils.validation import _check_sample_weight from ._dbscan_inner import dbscan_inner diff --git a/sklearn/cluster/_dbscan_inner.pyx b/sklearn/cluster/_dbscan_inner.pyx index 17ef3f1703a8b..22dc29517170f 100644 --- a/sklearn/cluster/_dbscan_inner.pyx +++ b/sklearn/cluster/_dbscan_inner.pyx @@ -2,8 +2,8 @@ # Author: Lars Buitinck # License: 3-clause BSD -from libcpp.vector cimport vector cimport numpy as cnp +from libcpp.vector cimport vector cnp.import_array() diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 457a83dd41e71..4d392de5f7fc0 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -6,10 +6,10 @@ # License: BSD 3 clause import numpy as np +from scipy.sparse import issparse from ..base import TransformerMixin from ..utils.validation import check_is_fitted -from scipy.sparse import issparse ############################################################################### # Mixin class for feature agglomeration. diff --git a/sklearn/cluster/_hierarchical_fast.pyx b/sklearn/cluster/_hierarchical_fast.pyx index 3ca48c8b7fc2c..b164d678aa788 100644 --- a/sklearn/cluster/_hierarchical_fast.pyx +++ b/sklearn/cluster/_hierarchical_fast.pyx @@ -1,8 +1,9 @@ # Author: Gael Varoquaux import numpy as np -cimport numpy as cnp + cimport cython +cimport numpy as cnp ctypedef cnp.float64_t DOUBLE ctypedef cnp.npy_intp INTP @@ -10,13 +11,14 @@ ctypedef cnp.int8_t INT8 cnp.import_array() -from ..metrics._dist_metrics cimport DistanceMetric -from ..utils._fast_dict cimport IntFloatDict - # C++ -from cython.operator cimport dereference as deref, preincrement as inc -from libcpp.map cimport map as cpp_map +from cython.operator cimport dereference as deref +from cython.operator cimport preincrement as inc from libc.math cimport fmax +from libcpp.map cimport map as cpp_map + +from ..metrics._dist_metrics cimport DistanceMetric +from ..utils._fast_dict cimport IntFloatDict DTYPE = np.float64 ctypedef cnp.float64_t DTYPE_t diff --git a/sklearn/cluster/_k_means_common.pyx b/sklearn/cluster/_k_means_common.pyx index 69acdb8410ad6..1cdcaa703f972 100644 --- a/sklearn/cluster/_k_means_common.pyx +++ b/sklearn/cluster/_k_means_common.pyx @@ -9,13 +9,13 @@ # provided by the user). This is fixed in cython > 0.3. import numpy as np + from cython cimport floating from cython.parallel cimport prange from libc.math cimport sqrt from ..utils.extmath import row_norms - # Number of samples per data chunk defined as a global constant. CHUNK_SIZE = 256 diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx index ab30a500a2a7d..c357ed2d7217a 100644 --- a/sklearn/cluster/_k_means_elkan.pyx +++ b/sklearn/cluster/_k_means_elkan.pyx @@ -8,20 +8,26 @@ IF SKLEARN_OPENMP_PARALLELISM_ENABLED: cimport openmp + from cython cimport floating -from cython.parallel import prange, parallel + +from cython.parallel import parallel, prange + from libc.math cimport sqrt from libc.stdlib cimport calloc, free -from libc.string cimport memset, memcpy +from libc.string cimport memcpy, memset from ..utils.extmath import row_norms from ._k_means_common import CHUNK_SIZE -from ._k_means_common cimport _relocate_empty_clusters_dense -from ._k_means_common cimport _relocate_empty_clusters_sparse -from ._k_means_common cimport _euclidean_dense_dense -from ._k_means_common cimport _euclidean_sparse_dense -from ._k_means_common cimport _average_centers -from ._k_means_common cimport _center_shift + +from ._k_means_common cimport ( + _average_centers, + _center_shift, + _euclidean_dense_dense, + _euclidean_sparse_dense, + _relocate_empty_clusters_dense, + _relocate_empty_clusters_sparse, +) def init_bounds_dense( diff --git a/sklearn/cluster/_k_means_lloyd.pyx b/sklearn/cluster/_k_means_lloyd.pyx index 55600f2910b05..e7eb73f3a3613 100644 --- a/sklearn/cluster/_k_means_lloyd.pyx +++ b/sklearn/cluster/_k_means_lloyd.pyx @@ -6,19 +6,27 @@ IF SKLEARN_OPENMP_PARALLELISM_ENABLED: cimport openmp + from cython cimport floating -from cython.parallel import prange, parallel -from libc.stdlib cimport malloc, calloc, free -from libc.string cimport memset + +from cython.parallel import parallel, prange + from libc.float cimport DBL_MAX, FLT_MAX +from libc.stdlib cimport calloc, free, malloc +from libc.string cimport memset from ..utils.extmath import row_norms -from ..utils._cython_blas cimport _gemm -from ..utils._cython_blas cimport RowMajor, Trans, NoTrans + +from ..utils._cython_blas cimport NoTrans, RowMajor, Trans, _gemm + from ._k_means_common import CHUNK_SIZE -from ._k_means_common cimport _relocate_empty_clusters_dense -from ._k_means_common cimport _relocate_empty_clusters_sparse -from ._k_means_common cimport _average_centers, _center_shift + +from ._k_means_common cimport ( + _average_centers, + _center_shift, + _relocate_empty_clusters_dense, + _relocate_empty_clusters_sparse, +) def lloyd_iter_chunked_dense( diff --git a/sklearn/cluster/_k_means_minibatch.pyx b/sklearn/cluster/_k_means_minibatch.pyx index b7bd4b1409284..ebfdcf2dac687 100644 --- a/sklearn/cluster/_k_means_minibatch.pyx +++ b/sklearn/cluster/_k_means_minibatch.pyx @@ -4,7 +4,7 @@ from cython cimport floating from cython.parallel cimport parallel, prange -from libc.stdlib cimport malloc, free +from libc.stdlib cimport free, malloc def _minibatch_update_dense( diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index eca8a5c2dc3ce..b6f974d6191a0 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -11,8 +11,8 @@ # Robert Layton # License: BSD 3 clause -from abc import ABC, abstractmethod import warnings +from abc import ABC, abstractmethod import numpy as np import scipy.sparse as sp @@ -23,33 +23,34 @@ TransformerMixin, _ClassNamePrefixFeaturesOutMixin, ) -from ..metrics.pairwise import euclidean_distances -from ..metrics.pairwise import _euclidean_distances -from ..utils.extmath import row_norms, stable_cumsum -from ..utils.fixes import threadpool_limits -from ..utils.fixes import threadpool_info -from ..utils.sparsefuncs_fast import assign_rows_csr -from ..utils.sparsefuncs import mean_variance_axis -from ..utils import check_array -from ..utils import check_random_state -from ..utils.validation import check_is_fitted, _check_sample_weight -from ..utils.validation import _is_arraylike_not_scalar +from ..exceptions import ConvergenceWarning +from ..metrics.pairwise import _euclidean_distances, euclidean_distances +from ..utils import check_array, check_random_state from ..utils._openmp_helpers import _openmp_effective_n_threads from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper -from ..exceptions import ConvergenceWarning -from ._k_means_common import CHUNK_SIZE -from ._k_means_common import _inertia_dense -from ._k_means_common import _inertia_sparse -from ._k_means_common import _is_same_clustering -from ._k_means_minibatch import _minibatch_update_dense -from ._k_means_minibatch import _minibatch_update_sparse -from ._k_means_lloyd import lloyd_iter_chunked_dense -from ._k_means_lloyd import lloyd_iter_chunked_sparse -from ._k_means_elkan import init_bounds_dense -from ._k_means_elkan import init_bounds_sparse -from ._k_means_elkan import elkan_iter_chunked_dense -from ._k_means_elkan import elkan_iter_chunked_sparse - +from ..utils.extmath import row_norms, stable_cumsum +from ..utils.fixes import threadpool_info, threadpool_limits +from ..utils.sparsefuncs import mean_variance_axis +from ..utils.sparsefuncs_fast import assign_rows_csr +from ..utils.validation import ( + _check_sample_weight, + _is_arraylike_not_scalar, + check_is_fitted, +) +from ._k_means_common import ( + CHUNK_SIZE, + _inertia_dense, + _inertia_sparse, + _is_same_clustering, +) +from ._k_means_elkan import ( + elkan_iter_chunked_dense, + elkan_iter_chunked_sparse, + init_bounds_dense, + init_bounds_sparse, +) +from ._k_means_lloyd import lloyd_iter_chunked_dense, lloyd_iter_chunked_sparse +from ._k_means_minibatch import _minibatch_update_dense, _minibatch_update_sparse ############################################################################### # Initialization heuristic diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index c686ab9895425..cfe2b94e9072e 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -14,18 +14,20 @@ # Gael Varoquaux # Martino Sorbaro -import numpy as np import warnings +from collections import defaultdict + +import numpy as np + from joblib import Parallel -from collections import defaultdict -from ..utils.validation import check_is_fitted -from ..utils.fixes import delayed -from ..utils import check_random_state, gen_batches, check_array +from .._config import config_context from ..base import BaseEstimator, ClusterMixin -from ..neighbors import NearestNeighbors from ..metrics.pairwise import pairwise_distances_argmin -from .._config import config_context +from ..neighbors import NearestNeighbors +from ..utils import check_array, check_random_state, gen_batches +from ..utils.fixes import delayed +from ..utils.validation import check_is_fitted def estimate_bandwidth(X, *, quantile=0.3, n_samples=None, random_state=0, n_jobs=None): diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index a6b159ef5c5a0..c477de3c50997 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -11,16 +11,17 @@ """ import warnings + import numpy as np +from scipy.sparse import SparseEfficiencyWarning, issparse +from ..base import BaseEstimator, ClusterMixin from ..exceptions import DataConversionWarning +from ..metrics import pairwise_distances from ..metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS +from ..neighbors import NearestNeighbors from ..utils import gen_batches, get_chunk_n_rows from ..utils.validation import check_memory -from ..neighbors import NearestNeighbors -from ..base import BaseEstimator, ClusterMixin -from ..metrics import pairwise_distances -from scipy.sparse import issparse, SparseEfficiencyWarning class OPTICS(ClusterMixin, BaseEstimator): diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py index 390b567c0d0bb..001af1f2a3b5e 100644 --- a/sklearn/cluster/_spectral.py +++ b/sklearn/cluster/_spectral.py @@ -10,15 +10,14 @@ import warnings import numpy as np - from scipy.linalg import LinAlgError, qr, svd from scipy.sparse import csc_matrix from ..base import BaseEstimator, ClusterMixin -from ..utils import check_random_state, as_float_array, check_scalar -from ..metrics.pairwise import pairwise_kernels -from ..neighbors import kneighbors_graph, NearestNeighbors from ..manifold import spectral_embedding +from ..metrics.pairwise import pairwise_kernels +from ..neighbors import NearestNeighbors, kneighbors_graph +from ..utils import as_float_array, check_random_state, check_scalar from ._kmeans import k_means diff --git a/sklearn/cluster/tests/common.py b/sklearn/cluster/tests/common.py index 0f4bd9e14926d..b1fe047fe230a 100644 --- a/sklearn/cluster/tests/common.py +++ b/sklearn/cluster/tests/common.py @@ -5,7 +5,6 @@ import numpy as np - ############################################################################### # Generate sample data diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index e5dc5d584266d..c5f5db6a72b1a 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -3,20 +3,18 @@ """ -import numpy as np -import pytest import warnings +import numpy as np +import pytest from scipy.sparse import csr_matrix -from sklearn.exceptions import ConvergenceWarning -from sklearn.utils._testing import assert_array_equal - -from sklearn.cluster import AffinityPropagation +from sklearn.cluster import AffinityPropagation, affinity_propagation from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences -from sklearn.cluster import affinity_propagation from sklearn.datasets import make_blobs +from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import euclidean_distances +from sklearn.utils._testing import assert_array_equal n_clusters = 3 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10 diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index 184fe3891804e..38cbbffc84154 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -4,23 +4,21 @@ import pytest from scipy.sparse import csr_matrix, issparse -from sklearn.model_selection import ParameterGrid - -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal - from sklearn.base import BaseEstimator, BiclusterMixin - -from sklearn.cluster import SpectralCoclustering -from sklearn.cluster import SpectralBiclustering -from sklearn.cluster._bicluster import _scale_normalize -from sklearn.cluster._bicluster import _bistochastic_normalize -from sklearn.cluster._bicluster import _log_normalize - -from sklearn.metrics import consensus_score, v_measure_score - +from sklearn.cluster import SpectralBiclustering, SpectralCoclustering +from sklearn.cluster._bicluster import ( + _bistochastic_normalize, + _log_normalize, + _scale_normalize, +) from sklearn.datasets import make_biclusters, make_checkerboard +from sklearn.metrics import consensus_score, v_measure_score +from sklearn.model_selection import ParameterGrid +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) class MockBiclustering(BiclusterMixin, BaseEstimator): diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index c5d88c2bc6f0e..38b672ceb6348 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -2,22 +2,22 @@ Tests for the birch clustering algorithm. """ -from scipy import sparse import numpy as np import pytest +from scipy import sparse +from sklearn.cluster import AgglomerativeClustering, Birch from sklearn.cluster.tests.common import generate_clustered_data -from sklearn.cluster import Birch -from sklearn.cluster import AgglomerativeClustering from sklearn.datasets import make_blobs from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import ElasticNet from sklearn.metrics import pairwise_distances_argmin, v_measure_score - -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) def test_n_samples_leaves_roots(): diff --git a/sklearn/cluster/tests/test_bisect_k_means.py b/sklearn/cluster/tests/test_bisect_k_means.py index a9904e61de04b..48ba872d415fb 100644 --- a/sklearn/cluster/tests/test_bisect_k_means.py +++ b/sklearn/cluster/tests/test_bisect_k_means.py @@ -2,8 +2,8 @@ import pytest import scipy.sparse as sp -from sklearn.utils._testing import assert_array_equal, assert_allclose from sklearn.cluster import BisectingKMeans +from sklearn.utils._testing import assert_allclose, assert_array_equal @pytest.mark.parametrize("bisecting_strategy", ["biggest_inertia", "largest_cluster"]) diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index b3b58b7a79b4b..aec8a8d789cbf 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -3,23 +3,18 @@ """ import pickle - -import numpy as np - import warnings -from scipy.spatial import distance -from scipy import sparse - +import numpy as np import pytest +from scipy import sparse +from scipy.spatial import distance -from sklearn.utils._testing import assert_array_equal -from sklearn.neighbors import NearestNeighbors -from sklearn.cluster import DBSCAN -from sklearn.cluster import dbscan +from sklearn.cluster import DBSCAN, dbscan from sklearn.cluster.tests.common import generate_clustered_data from sklearn.metrics.pairwise import pairwise_distances - +from sklearn.neighbors import NearestNeighbors +from sklearn.utils._testing import assert_array_equal n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index 3e4aa816b79c0..1f985fae05dc3 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -3,11 +3,11 @@ """ # Authors: Sergul Aydore 2017 import numpy as np - from numpy.testing import assert_array_equal + from sklearn.cluster import FeatureAgglomeration -from sklearn.utils._testing import assert_array_almost_equal from sklearn.datasets import make_blobs +from sklearn.utils._testing import assert_array_almost_equal def test_feature_agglomeration(): diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index c6607779f80fc..a478dc3ee6544 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -6,48 +6,48 @@ # Matteo Visconti di Oleggio Castello 2014 # License: BSD 3 clause import itertools -from tempfile import mkdtemp import shutil -import pytest from functools import partial +from tempfile import mkdtemp import numpy as np +import pytest from scipy import sparse from scipy.cluster import hierarchy from scipy.sparse.csgraph import connected_components -from sklearn.metrics.cluster import adjusted_rand_score -from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS -from sklearn.utils._testing import assert_almost_equal, create_memmap_backed_data -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings - -from sklearn.cluster import ward_tree -from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration +from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration, ward_tree from sklearn.cluster._agglomerative import ( - _hc_cut, _TREE_BUILDERS, - linkage_tree, _fix_connectivity, + _hc_cut, + linkage_tree, +) +from sklearn.cluster._hierarchical_fast import ( + average_merge, + max_merge, + mst_linkage_core, ) +from sklearn.datasets import make_circles, make_moons from sklearn.feature_extraction.image import grid_to_graph from sklearn.metrics import DistanceMetric +from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score from sklearn.metrics.pairwise import ( PAIRED_DISTANCES, cosine_distances, manhattan_distances, pairwise_distances, ) -from sklearn.metrics.cluster import normalized_mutual_info_score +from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS from sklearn.neighbors import kneighbors_graph -from sklearn.cluster._hierarchical_fast import ( - average_merge, - max_merge, - mst_linkage_core, -) from sklearn.utils._fast_dict import IntFloatDict -from sklearn.utils._testing import assert_array_equal -from sklearn.datasets import make_moons, make_circles +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + create_memmap_backed_data, + ignore_warnings, +) def test_linkage_misc(): diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 93de26c26c320..cab5d9b03512f 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -1,36 +1,31 @@ """Testing for K-means""" import re import sys +from io import StringIO import numpy as np -from scipy import sparse as sp - import pytest +from scipy import sparse as sp -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils.fixes import threadpool_limits from sklearn.base import clone +from sklearn.cluster import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus +from sklearn.cluster._k_means_common import ( + _euclidean_dense_dense_wrapper, + _euclidean_sparse_dense_wrapper, + _inertia_dense, + _inertia_sparse, + _is_same_clustering, + _relocate_empty_clusters_dense, + _relocate_empty_clusters_sparse, +) +from sklearn.cluster._kmeans import _labels_inertia, _mini_batch_step +from sklearn.datasets import make_blobs from sklearn.exceptions import ConvergenceWarning - -from sklearn.utils.extmath import row_norms -from sklearn.metrics import pairwise_distances -from sklearn.metrics import pairwise_distances_argmin +from sklearn.metrics import pairwise_distances, pairwise_distances_argmin from sklearn.metrics.cluster import v_measure_score -from sklearn.cluster import KMeans, k_means, kmeans_plusplus -from sklearn.cluster import MiniBatchKMeans -from sklearn.cluster._kmeans import _labels_inertia -from sklearn.cluster._kmeans import _mini_batch_step -from sklearn.cluster._k_means_common import _relocate_empty_clusters_dense -from sklearn.cluster._k_means_common import _relocate_empty_clusters_sparse -from sklearn.cluster._k_means_common import _euclidean_dense_dense_wrapper -from sklearn.cluster._k_means_common import _euclidean_sparse_dense_wrapper -from sklearn.cluster._k_means_common import _inertia_dense -from sklearn.cluster._k_means_common import _inertia_sparse -from sklearn.cluster._k_means_common import _is_same_clustering -from sklearn.datasets import make_blobs -from io import StringIO - +from sklearn.utils._testing import assert_allclose, assert_array_equal +from sklearn.utils.extmath import row_norms +from sklearn.utils.fixes import threadpool_limits # non centered, sparse centers to check the centers = np.array( diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py index cdd1134156173..f04e4ab7e23ef 100644 --- a/sklearn/cluster/tests/test_mean_shift.py +++ b/sklearn/cluster/tests/test_mean_shift.py @@ -3,23 +3,20 @@ """ -import numpy as np import warnings -import pytest +import numpy as np +import pytest from scipy import sparse -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose - -from sklearn.cluster import MeanShift -from sklearn.cluster import mean_shift -from sklearn.cluster import estimate_bandwidth -from sklearn.cluster import get_bin_seeds +from sklearn.cluster import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift from sklearn.datasets import make_blobs from sklearn.metrics import v_measure_score - +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, +) n_clusters = 3 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10 diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py index 6de9e9c656e22..66c7f127cd00a 100644 --- a/sklearn/cluster/tests/test_optics.py +++ b/sklearn/cluster/tests/test_optics.py @@ -1,24 +1,21 @@ # Authors: Shane Grigsby # Adrin Jalali # License: BSD 3 clause +import warnings + import numpy as np import pytest from scipy import sparse -import warnings -from sklearn.datasets import make_blobs -from sklearn.cluster import OPTICS +from sklearn.cluster import DBSCAN, OPTICS from sklearn.cluster._optics import _extend_region, _extract_xi_labels -from sklearn.exceptions import DataConversionWarning +from sklearn.cluster.tests.common import generate_clustered_data +from sklearn.datasets import make_blobs +from sklearn.exceptions import DataConversionWarning, EfficiencyWarning from sklearn.metrics.cluster import contingency_matrix from sklearn.metrics.pairwise import pairwise_distances -from sklearn.cluster import DBSCAN from sklearn.utils import shuffle -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.exceptions import EfficiencyWarning -from sklearn.cluster.tests.common import generate_clustered_data - +from sklearn.utils._testing import assert_allclose, assert_array_equal rng = np.random.RandomState(0) n_points_per_cluster = 10 diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index 19e92101ef7d4..29c6a01279bc2 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -1,25 +1,21 @@ """Testing for Spectral Clustering methods""" +import pickle import re import numpy as np +import pytest from scipy import sparse from scipy.linalg import LinAlgError -import pytest - -import pickle - -from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_equal - from sklearn.cluster import SpectralClustering, spectral_clustering -from sklearn.cluster._spectral import discretize, cluster_qr +from sklearn.cluster._spectral import cluster_qr, discretize +from sklearn.datasets import make_blobs from sklearn.feature_extraction import img_to_graph -from sklearn.metrics import pairwise_distances -from sklearn.metrics import adjusted_rand_score +from sklearn.metrics import adjusted_rand_score, pairwise_distances from sklearn.metrics.pairwise import kernel_metrics, rbf_kernel from sklearn.neighbors import NearestNeighbors -from sklearn.datasets import make_blobs +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_array_equal try: from pyamg import smoothed_aggregation_solver # noqa diff --git a/sklearn/compose/__init__.py b/sklearn/compose/__init__.py index 8be8d17040e82..7b137cdf9e07f 100644 --- a/sklearn/compose/__init__.py +++ b/sklearn/compose/__init__.py @@ -7,12 +7,11 @@ from ._column_transformer import ( ColumnTransformer, - make_column_transformer, make_column_selector, + make_column_transformer, ) from ._target import TransformedTargetRegressor - __all__ = [ "ColumnTransformer", "make_column_transformer", diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 15f1424498856..b376b2f8fbc29 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -3,28 +3,27 @@ to work with heterogeneous data and to apply different transformers to different columns. """ +from collections import Counter + # Author: Andreas Mueller # Joris Van den Bossche # License: BSD from itertools import chain -from collections import Counter import numpy as np from scipy import sparse + from joblib import Parallel -from ..base import clone, TransformerMixin -from ..utils._estimator_html_repr import _VisualBlock -from ..pipeline import _fit_transform_one, _transform_one, _name_estimators +from ..base import TransformerMixin, clone +from ..pipeline import _fit_transform_one, _name_estimators, _transform_one from ..preprocessing import FunctionTransformer -from ..utils import Bunch -from ..utils import _safe_indexing -from ..utils import _get_column_indices +from ..utils import Bunch, _get_column_indices, _safe_indexing +from ..utils._estimator_html_repr import _VisualBlock from ..utils.deprecation import deprecated -from ..utils.metaestimators import _BaseComposition -from ..utils.validation import check_array, check_is_fitted, _check_feature_names_in from ..utils.fixes import delayed - +from ..utils.metaestimators import _BaseComposition +from ..utils.validation import _check_feature_names_in, check_array, check_is_fitted __all__ = ["ColumnTransformer", "make_column_transformer", "make_column_selector"] diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index 74a826f031a83..7c5cb40a3a851 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -7,11 +7,11 @@ import numpy as np from ..base import BaseEstimator, RegressorMixin, clone -from ..utils.validation import check_is_fitted -from ..utils._tags import _safe_tags -from ..utils import check_array, _safe_indexing -from ..preprocessing import FunctionTransformer from ..exceptions import NotFittedError +from ..preprocessing import FunctionTransformer +from ..utils import _safe_indexing, check_array +from ..utils._tags import _safe_tags +from ..utils.validation import check_is_fitted __all__ = ["TransformedTargetRegressor"] diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index 7b2c0b91cccdf..a8861a23bdbd0 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -1,28 +1,33 @@ """ Test the ColumnTransformer. """ -import re import pickle +import re import numpy as np -from scipy import sparse import pytest - from numpy.testing import assert_allclose -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_almost_equal +from scipy import sparse from sklearn.base import BaseEstimator from sklearn.compose import ( ColumnTransformer, - make_column_transformer, make_column_selector, + make_column_transformer, ) from sklearn.exceptions import NotFittedError -from sklearn.preprocessing import FunctionTransformer -from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder from sklearn.feature_extraction import DictVectorizer +from sklearn.preprocessing import ( + FunctionTransformer, + Normalizer, + OneHotEncoder, + StandardScaler, +) +from sklearn.utils._testing import ( + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_equal, +) class Trans(BaseEstimator): diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py index f0d63c00c2772..53242b7e0277b 100644 --- a/sklearn/compose/tests/test_target.py +++ b/sklearn/compose/tests/test_target.py @@ -1,25 +1,14 @@ import numpy as np import pytest -from sklearn.base import clone -from sklearn.base import BaseEstimator -from sklearn.base import TransformerMixin - -from sklearn.dummy import DummyRegressor - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_no_warnings - -from sklearn.preprocessing import FunctionTransformer -from sklearn.preprocessing import StandardScaler - -from sklearn.pipeline import Pipeline - -from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit - from sklearn import datasets - +from sklearn.base import BaseEstimator, TransformerMixin, clone from sklearn.compose import TransformedTargetRegressor +from sklearn.dummy import DummyRegressor +from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import FunctionTransformer, StandardScaler +from sklearn.utils._testing import assert_allclose, assert_no_warnings friedman = datasets.make_friedman1(random_state=0) diff --git a/sklearn/conftest.py b/sklearn/conftest.py index 27ac720cbfe2e..ca5d4f2ade23d 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -1,26 +1,27 @@ -from os import environ -from functools import wraps import platform import sys +from functools import wraps +from os import environ -import pytest import numpy as np -from threadpoolctl import threadpool_limits +import pytest from _pytest.doctest import DoctestItem +from threadpoolctl import threadpool_limits +from sklearn._min_dependencies import PYTEST_MIN_VERSION +from sklearn.datasets import ( + fetch_20newsgroups, + fetch_20newsgroups_vectorized, + fetch_california_housing, + fetch_covtype, + fetch_kddcup99, + fetch_olivetti_faces, + fetch_rcv1, +) +from sklearn.tests import random_seed from sklearn.utils import _IS_32BIT from sklearn.utils._openmp_helpers import _openmp_effective_n_threads -from sklearn._min_dependencies import PYTEST_MIN_VERSION from sklearn.utils.fixes import parse_version -from sklearn.datasets import fetch_20newsgroups -from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.datasets import fetch_california_housing -from sklearn.datasets import fetch_covtype -from sklearn.datasets import fetch_kddcup99 -from sklearn.datasets import fetch_olivetti_faces -from sklearn.datasets import fetch_rcv1 -from sklearn.tests import random_seed - if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): raise ImportError( diff --git a/sklearn/covariance/__init__.py b/sklearn/covariance/__init__.py index 011fde3647145..8fcf8c68444e5 100644 --- a/sklearn/covariance/__init__.py +++ b/sklearn/covariance/__init__.py @@ -6,24 +6,23 @@ Models. """ +from ._elliptic_envelope import EllipticEnvelope from ._empirical_covariance import ( - empirical_covariance, EmpiricalCovariance, + empirical_covariance, log_likelihood, ) +from ._graph_lasso import GraphicalLasso, GraphicalLassoCV, graphical_lasso +from ._robust_covariance import MinCovDet, fast_mcd from ._shrunk_covariance import ( - shrunk_covariance, + OAS, + LedoitWolf, ShrunkCovariance, ledoit_wolf, ledoit_wolf_shrinkage, - LedoitWolf, oas, - OAS, + shrunk_covariance, ) -from ._robust_covariance import fast_mcd, MinCovDet -from ._graph_lasso import graphical_lasso, GraphicalLasso, GraphicalLassoCV -from ._elliptic_envelope import EllipticEnvelope - __all__ = [ "EllipticEnvelope", diff --git a/sklearn/covariance/_elliptic_envelope.py b/sklearn/covariance/_elliptic_envelope.py index 31f76fc30ca30..412f2e57e39ac 100644 --- a/sklearn/covariance/_elliptic_envelope.py +++ b/sklearn/covariance/_elliptic_envelope.py @@ -3,10 +3,11 @@ # License: BSD 3 clause import numpy as np -from . import MinCovDet -from ..utils.validation import check_is_fitted -from ..metrics import accuracy_score + from ..base import OutlierMixin +from ..metrics import accuracy_score +from ..utils.validation import check_is_fitted +from ._robust_covariance import MinCovDet class EllipticEnvelope(OutlierMixin, MinCovDet): diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py index 4362a14f04f6e..a1628e6a581f5 100644 --- a/sklearn/covariance/_empirical_covariance.py +++ b/sklearn/covariance/_empirical_covariance.py @@ -11,14 +11,15 @@ # avoid division truncation import warnings + import numpy as np from scipy import linalg from .. import config_context from ..base import BaseEstimator +from ..metrics.pairwise import pairwise_distances from ..utils import check_array from ..utils.extmath import fast_logdet -from ..metrics.pairwise import pairwise_distances def log_likelihood(emp_cov, precision): diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py index de56faa8d70e1..14a8018f358c8 100644 --- a/sklearn/covariance/_graph_lasso.py +++ b/sklearn/covariance/_graph_lasso.py @@ -2,28 +2,29 @@ estimator. """ +import operator +import sys +import time + # Author: Gael Varoquaux # License: BSD 3 clause # Copyright: INRIA import warnings -import operator -import sys -import time import numpy as np from scipy import linalg -from joblib import Parallel -from . import empirical_covariance, EmpiricalCovariance, log_likelihood +from joblib import Parallel from ..exceptions import ConvergenceWarning -from ..utils.validation import _is_arraylike_not_scalar, check_random_state -from ..utils.fixes import delayed # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast' from ..linear_model import _cd_fast as cd_fast # type: ignore from ..linear_model import lars_path_gram from ..model_selection import check_cv, cross_val_score +from ..utils.fixes import delayed +from ..utils.validation import _is_arraylike_not_scalar, check_random_state +from . import EmpiricalCovariance, empirical_covariance, log_likelihood # Helper functions to compute the objective and dual objective functions diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py index 2122dd8075a1d..2133b8d673bc8 100644 --- a/sklearn/covariance/_robust_covariance.py +++ b/sklearn/covariance/_robust_covariance.py @@ -8,15 +8,16 @@ # # License: BSD 3 clause -import warnings import numbers +import warnings + import numpy as np from scipy import linalg from scipy.stats import chi2 -from . import empirical_covariance, EmpiricalCovariance +from ..utils import check_array, check_random_state from ..utils.extmath import fast_logdet -from ..utils import check_random_state, check_array +from ._empirical_covariance import EmpiricalCovariance, empirical_covariance # Minimum Covariance Determinant diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 64fce5b1db6f6..539c746e28ad7 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -14,12 +14,12 @@ # avoid division truncation import warnings + import numpy as np -from . import empirical_covariance, EmpiricalCovariance from .._config import config_context from ..utils import check_array - +from . import EmpiricalCovariance, empirical_covariance # ShrunkCovariance estimator diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index 6a9031d0fcb36..07b144e631f55 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -7,21 +7,22 @@ import numpy as np import pytest -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal - from sklearn import datasets from sklearn.covariance import ( - empirical_covariance, + OAS, EmpiricalCovariance, - ShrunkCovariance, - shrunk_covariance, LedoitWolf, + ShrunkCovariance, + empirical_covariance, ledoit_wolf, ledoit_wolf_shrinkage, - OAS, oas, + shrunk_covariance, +) +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, ) X, _ = datasets.load_diabetes(return_X_y=True) diff --git a/sklearn/covariance/tests/test_elliptic_envelope.py b/sklearn/covariance/tests/test_elliptic_envelope.py index 90c059602bdae..f547e09a7f0d6 100644 --- a/sklearn/covariance/tests/test_elliptic_envelope.py +++ b/sklearn/covariance/tests/test_elliptic_envelope.py @@ -6,10 +6,12 @@ import pytest from sklearn.covariance import EllipticEnvelope -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal from sklearn.exceptions import NotFittedError +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) def test_elliptic_envelope(): diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py index dc099deac8fe3..fbf894259197d 100644 --- a/sklearn/covariance/tests/test_graphical_lasso.py +++ b/sklearn/covariance/tests/test_graphical_lasso.py @@ -1,26 +1,27 @@ """ Test the graphical_lasso module. """ import sys -import pytest +from io import StringIO import numpy as np -from scipy import linalg - +import pytest from numpy.testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_less -from sklearn.utils._testing import _convert_container +from scipy import linalg +from sklearn import datasets from sklearn.covariance import ( - graphical_lasso, GraphicalLasso, GraphicalLassoCV, empirical_covariance, + graphical_lasso, ) from sklearn.datasets import make_sparse_spd_matrix -from io import StringIO from sklearn.utils import check_random_state -from sklearn import datasets +from sklearn.utils._testing import ( + _convert_container, + assert_array_almost_equal, + assert_array_less, +) def test_graphical_lasso(random_state=0): diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py index 9bb93328b17a2..0b2a2bbbff815 100644 --- a/sklearn/covariance/tests/test_robust_covariance.py +++ b/sklearn/covariance/tests/test_robust_covariance.py @@ -9,11 +9,9 @@ import numpy as np import pytest -from sklearn.utils._testing import assert_array_almost_equal - from sklearn import datasets -from sklearn.covariance import empirical_covariance, MinCovDet -from sklearn.covariance import fast_mcd +from sklearn.covariance import MinCovDet, empirical_covariance, fast_mcd +from sklearn.utils._testing import assert_array_almost_equal X = datasets.load_iris().data X_1d = X[:, 0] diff --git a/sklearn/cross_decomposition/__init__.py b/sklearn/cross_decomposition/__init__.py index ec2f5fb3049af..47b78783caf9c 100644 --- a/sklearn/cross_decomposition/__init__.py +++ b/sklearn/cross_decomposition/__init__.py @@ -1,3 +1,3 @@ -from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA +from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"] diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index 8a804142e13bb..16ba9b2d38dce 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -12,15 +12,18 @@ import numpy as np from scipy.linalg import svd -from ..base import BaseEstimator, RegressorMixin, TransformerMixin -from ..base import MultiOutputMixin -from ..base import _ClassNamePrefixFeaturesOutMixin -from ..utils import check_array, check_scalar, check_consistent_length -from ..utils.fixes import sp_version -from ..utils.fixes import parse_version -from ..utils.extmath import svd_flip -from ..utils.validation import check_is_fitted, FLOAT_DTYPES +from ..base import ( + BaseEstimator, + MultiOutputMixin, + RegressorMixin, + TransformerMixin, + _ClassNamePrefixFeaturesOutMixin, +) from ..exceptions import ConvergenceWarning +from ..utils import check_array, check_consistent_length, check_scalar +from ..utils.extmath import svd_flip +from ..utils.fixes import parse_version, sp_version +from ..utils.validation import FLOAT_DTYPES, check_is_fitted __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"] diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index 57e8a2c20abc7..df37458fc1150 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -1,21 +1,20 @@ -import pytest import warnings + import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose +import pytest +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal -from sklearn.datasets import load_linnerud +from sklearn.cross_decomposition import CCA, PLSSVD, PLSCanonical, PLSRegression from sklearn.cross_decomposition._pls import ( _center_scale_xy, _get_first_singular_vectors_power_method, _get_first_singular_vectors_svd, _svd_flip_1d, ) -from sklearn.cross_decomposition import CCA -from sklearn.cross_decomposition import PLSSVD, PLSRegression, PLSCanonical -from sklearn.datasets import make_regression +from sklearn.datasets import load_linnerud, make_regression +from sklearn.exceptions import ConvergenceWarning from sklearn.utils import check_random_state from sklearn.utils.extmath import svd_flip -from sklearn.exceptions import ConvergenceWarning def assert_matrix_orthogonal(M): diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py index 42f7b2f12ac0e..9afb74e677394 100644 --- a/sklearn/datasets/__init__.py +++ b/sklearn/datasets/__init__.py @@ -3,53 +3,56 @@ including methods to load and fetch popular reference datasets. It also features some artificial data generators. """ -from ._base import load_breast_cancer -from ._base import load_boston -from ._base import load_diabetes -from ._base import load_digits -from ._base import load_files -from ._base import load_iris -from ._base import load_linnerud -from ._base import load_sample_images -from ._base import load_sample_image -from ._base import load_wine -from ._base import get_data_home -from ._base import clear_data_home +from ._base import ( + clear_data_home, + get_data_home, + load_boston, + load_breast_cancer, + load_diabetes, + load_digits, + load_files, + load_iris, + load_linnerud, + load_sample_image, + load_sample_images, + load_wine, +) +from ._california_housing import fetch_california_housing from ._covtype import fetch_covtype from ._kddcup99 import fetch_kddcup99 -from ._lfw import fetch_lfw_pairs -from ._lfw import fetch_lfw_people -from ._twenty_newsgroups import fetch_20newsgroups -from ._twenty_newsgroups import fetch_20newsgroups_vectorized -from ._openml import fetch_openml -from ._samples_generator import make_classification -from ._samples_generator import make_multilabel_classification -from ._samples_generator import make_hastie_10_2 -from ._samples_generator import make_regression -from ._samples_generator import make_blobs -from ._samples_generator import make_moons -from ._samples_generator import make_circles -from ._samples_generator import make_friedman1 -from ._samples_generator import make_friedman2 -from ._samples_generator import make_friedman3 -from ._samples_generator import make_low_rank_matrix -from ._samples_generator import make_sparse_coded_signal -from ._samples_generator import make_sparse_uncorrelated -from ._samples_generator import make_spd_matrix -from ._samples_generator import make_swiss_roll -from ._samples_generator import make_s_curve -from ._samples_generator import make_sparse_spd_matrix -from ._samples_generator import make_gaussian_quantiles -from ._samples_generator import make_biclusters -from ._samples_generator import make_checkerboard -from ._svmlight_format_io import load_svmlight_file -from ._svmlight_format_io import load_svmlight_files -from ._svmlight_format_io import dump_svmlight_file +from ._lfw import fetch_lfw_pairs, fetch_lfw_people from ._olivetti_faces import fetch_olivetti_faces -from ._species_distributions import fetch_species_distributions -from ._california_housing import fetch_california_housing +from ._openml import fetch_openml from ._rcv1 import fetch_rcv1 - +from ._samples_generator import ( + make_biclusters, + make_blobs, + make_checkerboard, + make_circles, + make_classification, + make_friedman1, + make_friedman2, + make_friedman3, + make_gaussian_quantiles, + make_hastie_10_2, + make_low_rank_matrix, + make_moons, + make_multilabel_classification, + make_regression, + make_s_curve, + make_sparse_coded_signal, + make_sparse_spd_matrix, + make_sparse_uncorrelated, + make_spd_matrix, + make_swiss_roll, +) +from ._species_distributions import fetch_species_distributions +from ._svmlight_format_io import ( + dump_svmlight_file, + load_svmlight_file, + load_svmlight_files, +) +from ._twenty_newsgroups import fetch_20newsgroups, fetch_20newsgroups_vectorized __all__ = [ "clear_data_home", diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py index b5603853491a2..ed30dde25b32c 100644 --- a/sklearn/datasets/_arff_parser.py +++ b/sklearn/datasets/_arff_parser.py @@ -9,11 +9,7 @@ from ..externals import _arff from ..externals._arff import ArffSparseDataType -from ..utils import ( - _chunk_generator, - check_pandas_support, - get_chunk_n_rows, -) +from ..utils import _chunk_generator, check_pandas_support, get_chunk_n_rows def _split_sparse_columns( diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index 367816fa4a467..4b545f2f84985 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -7,25 +7,22 @@ # 2010 Olivier Grisel # License: BSD 3 clause import csv -import hashlib import gzip +import hashlib +import os import shutil from collections import namedtuple -import os +from importlib import resources from os import environ, listdir, makedirs from os.path import expanduser, isdir, join, splitext -from importlib import resources from pathlib import Path - -from ..preprocessing import scale -from ..utils import Bunch -from ..utils import check_random_state -from ..utils import check_pandas_support -from ..utils.deprecation import deprecated +from urllib.request import urlretrieve import numpy as np -from urllib.request import urlretrieve +from ..preprocessing import scale +from ..utils import Bunch, check_pandas_support, check_random_state +from ..utils.deprecation import deprecated DATA_MODULE = "sklearn.datasets.data" DESCR_MODULE = "sklearn.datasets.descr" diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py index f3f7d0e57c502..a2eeae69049e5 100644 --- a/sklearn/datasets/_california_housing.py +++ b/sklearn/datasets/_california_housing.py @@ -21,23 +21,24 @@ # Authors: Peter Prettenhofer # License: BSD 3 clause -from os.path import exists -from os import makedirs, remove +import logging import tarfile +from os import makedirs, remove +from os.path import exists import numpy as np -import logging import joblib -from . import get_data_home -from ._base import _convert_data_dataframe -from ._base import _fetch_remote -from ._base import _pkl_filepath -from ._base import RemoteFileMetadata -from ._base import load_descr from ..utils import Bunch - +from . import get_data_home +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + _pkl_filepath, + load_descr, +) # The original data can be found at: # https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py index b43ea24141eed..5c098060da4bb 100644 --- a/sklearn/datasets/_covtype.py +++ b/sklearn/datasets/_covtype.py @@ -14,24 +14,25 @@ # Peter Prettenhofer # License: BSD 3 clause -from gzip import GzipFile import logging -from os.path import exists, join import os +from gzip import GzipFile +from os.path import exists, join from tempfile import TemporaryDirectory import numpy as np + import joblib +from ..utils import Bunch, check_random_state from . import get_data_home -from ._base import _convert_data_dataframe -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import load_descr -from ..utils import Bunch -from ._base import _pkl_filepath -from ..utils import check_random_state - +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + _pkl_filepath, + load_descr, +) # The original data can be found in: # https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py index b698d299b7c8d..a2efb8569b944 100644 --- a/sklearn/datasets/_kddcup99.py +++ b/sklearn/datasets/_kddcup99.py @@ -9,23 +9,24 @@ """ import errno -from gzip import GzipFile import logging import os +from gzip import GzipFile from os.path import exists, join import numpy as np + import joblib -from ._base import _fetch_remote -from ._base import _convert_data_dataframe -from . import get_data_home -from ._base import RemoteFileMetadata -from ._base import load_descr -from ..utils import Bunch -from ..utils import check_random_state +from ..utils import Bunch, check_random_state from ..utils import shuffle as shuffle_method - +from . import get_data_home +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + load_descr, +) # The original data can be found at: # https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index dc1267af59f96..00b5bc35d1eb5 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -8,21 +8,16 @@ # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause -from os import listdir, makedirs, remove -from os.path import join, exists, isdir - import logging +from os import listdir, makedirs, remove +from os.path import exists, isdir, join import numpy as np + from joblib import Memory -from ._base import ( - get_data_home, - _fetch_remote, - RemoteFileMetadata, - load_descr, -) from ..utils import Bunch +from ._base import RemoteFileMetadata, _fetch_remote, get_data_home, load_descr logger = logging.getLogger(__name__) diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py index 296a3868081d9..fb6e78300ebe9 100644 --- a/sklearn/datasets/_olivetti_faces.py +++ b/sklearn/datasets/_olivetti_faces.py @@ -13,19 +13,17 @@ # Copyright (c) 2011 David Warde-Farley # License: BSD 3 clause -from os.path import exists from os import makedirs, remove +from os.path import exists import numpy as np from scipy.io import loadmat + import joblib +from ..utils import Bunch, check_random_state from . import get_data_home -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import _pkl_filepath -from ._base import load_descr -from ..utils import check_random_state, Bunch +from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr # The original data can be found at: # https://cs.nyu.edu/~roweis/data/olivettifaces.mat diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index ff98533cc8624..2e730ff5f87f8 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -7,18 +7,18 @@ from contextlib import closing from functools import wraps from os.path import join -from typing import Callable, Optional, Dict, Tuple, List, Any, Union from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, List, Optional, Tuple, Union from urllib.error import HTTPError, URLError -from urllib.request import urlopen, Request +from urllib.request import Request, urlopen from warnings import warn import numpy as np +from ..utils import check_pandas_support # noqa +from ..utils import Bunch from . import get_data_home from ._arff_parser import load_arff_from_gzip_file -from ..utils import Bunch -from ..utils import check_pandas_support # noqa __all__ = ["fetch_openml"] diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py index cca30afefff34..20618ff5f944a 100644 --- a/sklearn/datasets/_rcv1.py +++ b/sklearn/datasets/_rcv1.py @@ -9,24 +9,20 @@ # License: BSD 3 clause import logging - -from os import remove, makedirs -from os.path import exists, join from gzip import GzipFile +from os import makedirs, remove +from os.path import exists, join import numpy as np import scipy.sparse as sp + import joblib +from ..utils import Bunch +from ..utils import shuffle as shuffle_ from . import get_data_home -from ._base import _pkl_filepath -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import load_descr +from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr from ._svmlight_format_io import load_svmlight_files -from ..utils import shuffle as shuffle_ -from ..utils import Bunch - # The original vectorized data can be found at: # http://www.ai.mit.edu/projects/jmlr/papers/volume5/lewis04a/a13-vector-files/lyrl2004_vectors_test_pt0.dat.gz diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index acc7a6e43b06c..71e39799daeae 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -6,14 +6,14 @@ # G. Louppe, J. Nothman # License: BSD 3 clause -import numbers import array +import numbers import warnings from collections.abc import Iterable import numpy as np -from scipy import linalg import scipy.sparse as sp +from scipy import linalg from ..preprocessing import MultiLabelBinarizer from ..utils import check_array, check_random_state diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py index 9afc6e08cd6cb..6406d09ce5cd4 100644 --- a/sklearn/datasets/_species_distributions.py +++ b/sklearn/datasets/_species_distributions.py @@ -37,20 +37,18 @@ # # License: BSD 3 clause +import logging from io import BytesIO from os import makedirs, remove from os.path import exists -import logging import numpy as np import joblib -from . import get_data_home -from ._base import _fetch_remote -from ._base import RemoteFileMetadata from ..utils import Bunch -from ._base import _pkl_filepath +from . import get_data_home +from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath # The original data can be found at: # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip diff --git a/sklearn/datasets/_svmlight_format_fast.pyx b/sklearn/datasets/_svmlight_format_fast.pyx index bf5554714c199..845d948de9b05 100644 --- a/sklearn/datasets/_svmlight_format_fast.pyx +++ b/sklearn/datasets/_svmlight_format_fast.pyx @@ -6,8 +6,9 @@ # License: BSD 3 clause import array -from cpython cimport array + cimport cython +from cpython cimport array from libc.string cimport strchr import numpy as np diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py index a3d1a6e587457..47aec0dc0e9d3 100644 --- a/sklearn/datasets/_svmlight_format_io.py +++ b/sklearn/datasets/_svmlight_format_io.py @@ -15,16 +15,15 @@ # Olivier Grisel # License: BSD 3 clause -from contextlib import closing import io import os.path +from contextlib import closing import numpy as np import scipy.sparse as sp from .. import __version__ - -from ..utils import check_array, IS_PYPY +from ..utils import IS_PYPY, check_array if not IS_PYPY: from ._svmlight_format_fast import _load_svmlight_file diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py index 22bea7e59482d..7453e73646719 100644 --- a/sklearn/datasets/_twenty_newsgroups.py +++ b/sklearn/datasets/_twenty_newsgroups.py @@ -24,28 +24,30 @@ # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause -import os +import codecs import logging -import tarfile +import os import pickle -import shutil import re -import codecs +import shutil +import tarfile import numpy as np import scipy.sparse as sp + import joblib -from . import get_data_home -from . import load_files -from ._base import _convert_data_dataframe -from ._base import _pkl_filepath -from ._base import _fetch_remote -from ._base import RemoteFileMetadata -from ._base import load_descr -from ..feature_extraction.text import CountVectorizer from .. import preprocessing -from ..utils import check_random_state, Bunch +from ..feature_extraction.text import CountVectorizer +from ..utils import Bunch, check_random_state +from . import get_data_home, load_files +from ._base import ( + RemoteFileMetadata, + _convert_data_dataframe, + _fetch_remote, + _pkl_filepath, + load_descr, +) logger = logging.getLogger(__name__) diff --git a/sklearn/datasets/setup.py b/sklearn/datasets/setup.py index a75f14a083297..8d65328e69048 100644 --- a/sklearn/datasets/setup.py +++ b/sklearn/datasets/setup.py @@ -1,7 +1,8 @@ -import numpy import os import platform +import numpy + def configuration(parent_package="", top_path=None): from numpy.distutils.misc_util import Configuration diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py index ef1280f6218b1..c8ab1cd04ee6e 100644 --- a/sklearn/datasets/tests/conftest.py +++ b/sklearn/datasets/tests/conftest.py @@ -1,6 +1,7 @@ """ Network tests are only run, if data is already locally available, or if download is specifically requested by environment variable.""" import builtins + import pytest diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py index 4244dd7865945..d1d03fdca7c9a 100644 --- a/sklearn/datasets/tests/test_20news.py +++ b/sklearn/datasets/tests/test_20news.py @@ -4,16 +4,17 @@ from functools import partial from unittest.mock import patch -import pytest - import numpy as np +import pytest import scipy.sparse as sp -from sklearn.datasets.tests.test_common import check_as_frame -from sklearn.datasets.tests.test_common import check_pandas_dependency_message -from sklearn.datasets.tests.test_common import check_return_X_y -from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.datasets.tests.test_common import ( + check_as_frame, + check_pandas_dependency_message, + check_return_X_y, +) from sklearn.preprocessing import normalize +from sklearn.utils._testing import assert_allclose_dense_sparse def test_20news(fetch_20newsgroups_fxt): diff --git a/sklearn/datasets/tests/test_arff_parser.py b/sklearn/datasets/tests/test_arff_parser.py index 3a06a3c338394..b92ade2e3617a 100644 --- a/sklearn/datasets/tests/test_arff_parser.py +++ b/sklearn/datasets/tests/test_arff_parser.py @@ -1,9 +1,6 @@ import pytest -from sklearn.datasets._arff_parser import ( - _post_process_frame, - load_arff_from_gzip_file, -) +from sklearn.datasets._arff_parser import _post_process_frame, load_arff_from_gzip_file @pytest.mark.parametrize( diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 2eeb2fc570094..270b985e10f87 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -2,33 +2,32 @@ import shutil import tempfile import warnings -from pickle import loads -from pickle import dumps from functools import partial from importlib import resources +from pickle import dumps, loads -import pytest import numpy as np -from sklearn.datasets import get_data_home -from sklearn.datasets import clear_data_home -from sklearn.datasets import load_files -from sklearn.datasets import load_sample_images -from sklearn.datasets import load_sample_image -from sklearn.datasets import load_digits -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_linnerud -from sklearn.datasets import load_iris -from sklearn.datasets import load_breast_cancer -from sklearn.datasets import load_boston -from sklearn.datasets import load_wine -from sklearn.datasets._base import ( - load_csv_data, - load_gzip_compressed_csv_data, +import pytest + +from sklearn.datasets import ( + clear_data_home, + get_data_home, + load_boston, + load_breast_cancer, + load_diabetes, + load_digits, + load_files, + load_iris, + load_linnerud, + load_sample_image, + load_sample_images, + load_wine, ) +from sklearn.datasets._base import load_csv_data, load_gzip_compressed_csv_data +from sklearn.datasets.tests.test_common import check_as_frame from sklearn.preprocessing import scale from sklearn.utils import Bunch from sklearn.utils._testing import SkipTest -from sklearn.datasets.tests.test_common import check_as_frame def _remove_dir(path): diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py index 82a321e96a8d6..868d3d8084ecc 100644 --- a/sklearn/datasets/tests/test_california_housing.py +++ b/sklearn/datasets/tests/test_california_housing.py @@ -1,10 +1,11 @@ """Test the california_housing loader, if the data is available, or if specifically requested via environment variable (e.g. for travis cron job).""" +from functools import partial + import pytest from sklearn.datasets.tests.test_common import check_return_X_y -from functools import partial def test_fetch(fetch_california_housing_fxt): diff --git a/sklearn/datasets/tests/test_common.py b/sklearn/datasets/tests/test_common.py index 49155837be25b..5f53d4e659e51 100644 --- a/sklearn/datasets/tests/test_common.py +++ b/sklearn/datasets/tests/test_common.py @@ -2,8 +2,8 @@ import inspect import os -import pytest import numpy as np +import pytest import sklearn.datasets diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py index bbdd395a847f4..72fe0fdd526f8 100644 --- a/sklearn/datasets/tests/test_covtype.py +++ b/sklearn/datasets/tests/test_covtype.py @@ -2,7 +2,9 @@ or if specifically requested via environment variable (e.g. for travis cron job).""" from functools import partial + import pytest + from sklearn.datasets.tests.test_common import check_return_X_y diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py index b935da3a26add..965e1d46231e9 100644 --- a/sklearn/datasets/tests/test_kddcup99.py +++ b/sklearn/datasets/tests/test_kddcup99.py @@ -7,11 +7,14 @@ """ from functools import partial + import pytest -from sklearn.datasets.tests.test_common import check_as_frame -from sklearn.datasets.tests.test_common import check_pandas_dependency_message -from sklearn.datasets.tests.test_common import check_return_X_y +from sklearn.datasets.tests.test_common import ( + check_as_frame, + check_pandas_dependency_message, + check_return_X_y, +) @pytest.mark.parametrize("as_frame", [True, False]) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index fba3949befb1a..49a8ed1d73ded 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -8,19 +8,18 @@ joblib, successive runs will be fast (less than 200ms). """ -import random import os +import random import shutil import tempfile +from functools import partial + import numpy as np import pytest -from functools import partial -from sklearn.datasets import fetch_lfw_pairs -from sklearn.datasets import fetch_lfw_people -from sklearn.utils._testing import assert_array_equal +from sklearn.datasets import fetch_lfw_pairs, fetch_lfw_people from sklearn.datasets.tests.test_common import check_return_X_y - +from sklearn.utils._testing import assert_array_equal SCIKIT_LEARN_DATA = None SCIKIT_LEARN_EMPTY_DATA = None diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py index 7d11516b0426c..99b017bbac309 100644 --- a/sklearn/datasets/tests/test_olivetti_faces.py +++ b/sklearn/datasets/tests/test_olivetti_faces.py @@ -4,9 +4,8 @@ import numpy as np -from sklearn.utils import Bunch from sklearn.datasets.tests.test_common import check_return_X_y - +from sklearn.utils import Bunch from sklearn.utils._testing import assert_array_equal diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index 826a07783a6b0..3d0befcc9ca4c 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -9,11 +9,18 @@ from urllib.error import HTTPError import numpy as np -import scipy.sparse import pytest +import scipy.sparse import sklearn from sklearn import config_context +from sklearn.datasets import fetch_openml as fetch_openml_orig +from sklearn.datasets._openml import ( + _OPENML_PREFIX, + _get_local_path, + _open_openml_url, + _retry_with_clean_cache, +) from sklearn.utils import Bunch, check_pandas_support from sklearn.utils._testing import ( SkipTest, @@ -22,15 +29,6 @@ fails_if_pypy, ) -from sklearn.datasets import fetch_openml as fetch_openml_orig -from sklearn.datasets._openml import ( - _OPENML_PREFIX, - _open_openml_url, - _get_local_path, - _retry_with_clean_cache, -) - - OPENML_TEST_DATA_MODULE = "sklearn.datasets.tests.data.openml" # if True, urlopen will be monkey patched to only use local files test_offline = True diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py index cdc9f02c010c5..a3718a8b76bb0 100644 --- a/sklearn/datasets/tests/test_rcv1.py +++ b/sklearn/datasets/tests/test_rcv1.py @@ -2,12 +2,13 @@ or if specifically requested via environment variable (e.g. for travis cron job).""" -import scipy.sparse as sp -import numpy as np from functools import partial + +import numpy as np +import scipy.sparse as sp + from sklearn.datasets.tests.test_common import check_return_X_y -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_almost_equal, assert_array_equal def test_fetch_rcv1(fetch_rcv1_fxt): diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index b464178906f04..4eeca65d7a6d9 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -6,30 +6,32 @@ import pytest import scipy.sparse as sp -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose - -from sklearn.datasets import make_classification -from sklearn.datasets import make_multilabel_classification -from sklearn.datasets import make_hastie_10_2 -from sklearn.datasets import make_regression -from sklearn.datasets import make_blobs -from sklearn.datasets import make_friedman1 -from sklearn.datasets import make_friedman2 -from sklearn.datasets import make_friedman3 -from sklearn.datasets import make_low_rank_matrix -from sklearn.datasets import make_moons -from sklearn.datasets import make_circles -from sklearn.datasets import make_sparse_coded_signal -from sklearn.datasets import make_sparse_uncorrelated -from sklearn.datasets import make_spd_matrix -from sklearn.datasets import make_swiss_roll -from sklearn.datasets import make_s_curve -from sklearn.datasets import make_biclusters -from sklearn.datasets import make_checkerboard - +from sklearn.datasets import ( + make_biclusters, + make_blobs, + make_checkerboard, + make_circles, + make_classification, + make_friedman1, + make_friedman2, + make_friedman3, + make_hastie_10_2, + make_low_rank_matrix, + make_moons, + make_multilabel_classification, + make_regression, + make_s_curve, + make_sparse_coded_signal, + make_sparse_uncorrelated, + make_spd_matrix, + make_swiss_roll, +) +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) from sklearn.utils.validation import assert_all_finite diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index 892b6d0d43ba6..a131972bb71dc 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -1,22 +1,22 @@ -from bz2 import BZ2File import gzip -from io import BytesIO -import numpy as np -import scipy.sparse as sp import os import shutil +from bz2 import BZ2File from importlib import resources +from io import BytesIO from tempfile import NamedTemporaryFile +import numpy as np import pytest - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import fails_if_pypy +import scipy.sparse as sp import sklearn -from sklearn.datasets import load_svmlight_file, load_svmlight_files, dump_svmlight_file - +from sklearn.datasets import dump_svmlight_file, load_svmlight_file, load_svmlight_files +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_array_equal, + fails_if_pypy, +) TEST_DATA_MODULE = "sklearn.datasets.tests.data" datafile = "svmlight_classification.txt" diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py index c5f323d3c5d72..88851a0b3e5a4 100644 --- a/sklearn/decomposition/__init__.py +++ b/sklearn/decomposition/__init__.py @@ -5,29 +5,24 @@ """ -from ._nmf import ( - NMF, - MiniBatchNMF, - non_negative_factorization, -) -from ._pca import PCA -from ._incremental_pca import IncrementalPCA -from ._kernel_pca import KernelPCA -from ._sparse_pca import SparsePCA, MiniBatchSparsePCA -from ._truncated_svd import TruncatedSVD -from ._fastica import FastICA, fastica +from ..utils.extmath import randomized_svd from ._dict_learning import ( - dict_learning, - dict_learning_online, - sparse_encode, DictionaryLearning, MiniBatchDictionaryLearning, SparseCoder, + dict_learning, + dict_learning_online, + sparse_encode, ) from ._factor_analysis import FactorAnalysis -from ..utils.extmath import randomized_svd +from ._fastica import FastICA, fastica +from ._incremental_pca import IncrementalPCA +from ._kernel_pca import KernelPCA from ._lda import LatentDirichletAllocation - +from ._nmf import NMF, MiniBatchNMF, non_negative_factorization +from ._pca import PCA +from ._sparse_pca import MiniBatchSparsePCA, SparsePCA +from ._truncated_svd import TruncatedSVD __all__ = [ "DictionaryLearning", diff --git a/sklearn/decomposition/_base.py b/sklearn/decomposition/_base.py index 888fc3856d1b8..dd273af67ec63 100644 --- a/sklearn/decomposition/_base.py +++ b/sklearn/decomposition/_base.py @@ -8,12 +8,13 @@ # # License: BSD 3 clause +from abc import ABCMeta, abstractmethod + import numpy as np from scipy import linalg from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..utils.validation import check_is_fitted -from abc import ABCMeta, abstractmethod class _BasePCA( diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index ad3db76bfd4b5..4a25838eb9c78 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -3,25 +3,29 @@ # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort # License: BSD 3 clause -import time -import sys import itertools +import sys +import time import warnings - from math import ceil import numpy as np from scipy import linalg + from joblib import Parallel, effective_n_jobs from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..utils import check_array, check_random_state, gen_even_slices, gen_batches -from ..utils import deprecated +from ..linear_model import Lars, Lasso, LassoLars, orthogonal_mp_gram +from ..utils import ( + check_array, + check_random_state, + deprecated, + gen_batches, + gen_even_slices, +) from ..utils.extmath import randomized_svd, row_norms, svd_flip -from ..utils.validation import check_is_fitted -from ..utils.validation import check_scalar from ..utils.fixes import delayed -from ..linear_model import Lasso, orthogonal_mp_gram, LassoLars, Lars +from ..utils.validation import check_is_fitted, check_scalar def _check_positive_coding(method, positive): diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py index 4b8eab3492ca8..473d22446c8d9 100644 --- a/sklearn/decomposition/_factor_analysis.py +++ b/sklearn/decomposition/_factor_analysis.py @@ -20,16 +20,16 @@ # License: BSD3 import warnings -from math import sqrt, log +from math import log, sqrt + import numpy as np from scipy import linalg - from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin +from ..exceptions import ConvergenceWarning from ..utils import check_random_state from ..utils.extmath import fast_logdet, randomized_svd, squared_norm from ..utils.validation import check_is_fitted -from ..exceptions import ConvergenceWarning class FactorAnalysis(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index 490a3323344d1..96da62d7a0606 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -16,8 +16,7 @@ from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..exceptions import ConvergenceWarning - -from ..utils import check_array, as_float_array, check_random_state +from ..utils import as_float_array, check_array, check_random_state from ..utils.validation import check_is_fitted __all__ = ["fastica", "FastICA"] diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py index 589796a7c97f7..ad36088f8a3ae 100644 --- a/sklearn/decomposition/_incremental_pca.py +++ b/sklearn/decomposition/_incremental_pca.py @@ -7,9 +7,9 @@ import numpy as np from scipy import linalg, sparse -from ._base import _BasePCA from ..utils import gen_batches -from ..utils.extmath import svd_flip, _incremental_mean_and_var +from ..utils.extmath import _incremental_mean_and_var, svd_flip +from ._base import _BasePCA class IncrementalPCA(_BasePCA): diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py index 4e3ad720ae126..dd3dd4c6ea18a 100644 --- a/sklearn/decomposition/_kernel_pca.py +++ b/sklearn/decomposition/_kernel_pca.py @@ -4,23 +4,20 @@ # Sylvain Marie # License: BSD 3 clause -import numpy as np import numbers + +import numpy as np from scipy import linalg from scipy.sparse.linalg import eigsh -from ..utils._arpack import _init_arpack_v0 -from ..utils.extmath import svd_flip, _randomized_eigsh -from ..utils.validation import ( - check_is_fitted, - _check_psd_eigenvalues, - check_scalar, -) -from ..utils.deprecation import deprecated -from ..exceptions import NotFittedError from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..preprocessing import KernelCenterer +from ..exceptions import NotFittedError from ..metrics.pairwise import pairwise_kernels +from ..preprocessing import KernelCenterer +from ..utils._arpack import _init_arpack_v0 +from ..utils.deprecation import deprecated +from ..utils.extmath import _randomized_eigsh, svd_flip +from ..utils.validation import _check_psd_eigenvalues, check_is_fitted, check_scalar class KernelPCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py index 6db9d900566eb..8c3dd70f8c3bc 100644 --- a/sklearn/decomposition/_lda.py +++ b/sklearn/decomposition/_lda.py @@ -14,18 +14,17 @@ import numpy as np import scipy.sparse as sp from scipy.special import gammaln, logsumexp + from joblib import Parallel, effective_n_jobs from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..utils import check_random_state, gen_batches, gen_even_slices -from ..utils.validation import check_non_negative -from ..utils.validation import check_is_fitted from ..utils.fixes import delayed - +from ..utils.validation import check_is_fitted, check_non_negative from ._online_lda_fast import ( - mean_change, _dirichlet_expectation_1d, _dirichlet_expectation_2d, + mean_change, ) EPS = np.finfo(float).eps diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 7623822ba5912..81a3a088cf791 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -6,25 +6,23 @@ # Tom Dupre la Tour # License: BSD 3 clause +import itertools import numbers -import numpy as np -import scipy.sparse as sp import time -import itertools import warnings from math import sqrt + +import numpy as np +import scipy.sparse as sp from scipy import linalg -from ._cdnmf_fast import _update_cdnmf_fast from .._config import config_context from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..exceptions import ConvergenceWarning -from ..utils import check_random_state, check_array, gen_batches +from ..utils import check_array, check_random_state, gen_batches from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm -from ..utils.validation import ( - check_is_fitted, - check_non_negative, -) +from ..utils.validation import check_is_fitted, check_non_negative +from ._cdnmf_fast import _update_cdnmf_fast EPSILON = np.finfo(np.float32).eps diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py index 635e119ae445d..e4367640667ca 100644 --- a/sklearn/decomposition/_pca.py +++ b/sklearn/decomposition/_pca.py @@ -10,21 +10,20 @@ # # License: BSD 3 clause -from math import log, sqrt import numbers +from math import log, sqrt import numpy as np from scipy import linalg -from scipy.special import gammaln from scipy.sparse import issparse from scipy.sparse.linalg import svds +from scipy.special import gammaln -from ._base import _BasePCA from ..utils import check_random_state, check_scalar from ..utils._arpack import _init_arpack_v0 -from ..utils.extmath import fast_logdet, randomized_svd, svd_flip -from ..utils.extmath import stable_cumsum +from ..utils.extmath import fast_logdet, randomized_svd, stable_cumsum, svd_flip from ..utils.validation import check_is_fitted +from ._base import _BasePCA def _assess_dimension(spectrum, rank, n_samples): diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index a36bfbfd529d0..a8aa44ea488fe 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -6,10 +6,10 @@ import numpy as np +from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin +from ..linear_model import ridge_regression from ..utils import check_random_state from ..utils.validation import check_is_fitted -from ..linear_model import ridge_regression -from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ._dict_learning import dict_learning, dict_learning_online diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py index b8417543783d4..367ab9058b654 100644 --- a/sklearn/decomposition/_truncated_svd.py +++ b/sklearn/decomposition/_truncated_svd.py @@ -7,6 +7,7 @@ # License: 3-clause BSD. from numbers import Integral + import numpy as np import scipy.sparse as sp from scipy.sparse.linalg import svds diff --git a/sklearn/decomposition/setup.py b/sklearn/decomposition/setup.py index 2937f282b755d..c32eeadffe9e1 100644 --- a/sklearn/decomposition/setup.py +++ b/sklearn/decomposition/setup.py @@ -1,4 +1,5 @@ import os + import numpy from numpy.distutils.misc_util import Configuration diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index b53148cddec57..0910edde825e1 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -1,34 +1,34 @@ -import pytest +import itertools import warnings +from functools import partial import numpy as np -from functools import partial -import itertools +import pytest from sklearn.base import clone - +from sklearn.decomposition import ( + DictionaryLearning, + MiniBatchDictionaryLearning, + SparseCoder, + dict_learning, + dict_learning_online, + sparse_encode, +) +from sklearn.decomposition._dict_learning import _update_dict from sklearn.exceptions import ConvergenceWarning - from sklearn.utils import check_array - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import TempMemmap - -from sklearn.decomposition import DictionaryLearning -from sklearn.decomposition import MiniBatchDictionaryLearning -from sklearn.decomposition import SparseCoder -from sklearn.decomposition import dict_learning -from sklearn.decomposition import dict_learning_online -from sklearn.decomposition import sparse_encode -from sklearn.utils.estimator_checks import check_transformer_data_not_an_array -from sklearn.utils.estimator_checks import check_transformer_general -from sklearn.utils.estimator_checks import check_transformers_unfitted - -from sklearn.decomposition._dict_learning import _update_dict - +from sklearn.utils._testing import ( + TempMemmap, + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.estimator_checks import ( + check_transformer_data_not_an_array, + check_transformer_general, + check_transformers_unfitted, +) rng_global = np.random.RandomState(0) n_samples, n_features = 10, 8 @@ -401,8 +401,8 @@ def test_dict_learning_online_positivity(positive_code, positive_dict): def test_dict_learning_online_verbosity(): # test verbosity for better coverage n_components = 5 - from io import StringIO import sys + from io import StringIO old_stdout = sys.stdout try: diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py index 89ef433521e09..7ffac87f4f5a1 100644 --- a/sklearn/decomposition/tests/test_factor_analysis.py +++ b/sklearn/decomposition/tests/test_factor_analysis.py @@ -7,12 +7,14 @@ import numpy as np import pytest -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.exceptions import ConvergenceWarning from sklearn.decomposition import FactorAnalysis -from sklearn.utils._testing import ignore_warnings from sklearn.decomposition._factor_analysis import _ortho_rotation +from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + ignore_warnings, +) # Ignore warnings from switching to more power iterations in randomized_svd diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 082b7d68dee79..f4d90514be96c 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -2,18 +2,16 @@ Test the fastica algorithm. """ import itertools -import pytest import warnings import numpy as np +import pytest from scipy import stats -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose - -from sklearn.decomposition import FastICA, fastica, PCA +from sklearn.decomposition import PCA, FastICA, fastica from sklearn.decomposition._fastica import _gs_decorrelation from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import assert_allclose, assert_array_equal def center_and_norm(x, axis=-1): diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index f74d608c45b40..6db12280d21f1 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -1,17 +1,18 @@ """Tests for Incremental PCA.""" -import numpy as np -import pytest import warnings -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose_dense_sparse +import numpy as np +import pytest from numpy.testing import assert_array_equal +from scipy import sparse from sklearn import datasets from sklearn.decomposition import PCA, IncrementalPCA - -from scipy import sparse +from sklearn.utils._testing import ( + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_almost_equal, +) iris = datasets.load_iris() diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index 167ce84dc9082..46127033c28dd 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -1,23 +1,22 @@ -import numpy as np -import scipy.sparse as sp -import pytest import warnings -from sklearn.utils._testing import ( - assert_array_almost_equal, - assert_array_equal, - assert_allclose, -) +import numpy as np +import pytest +import scipy.sparse as sp +from sklearn.datasets import make_blobs, make_circles from sklearn.decomposition import PCA, KernelPCA -from sklearn.datasets import make_circles -from sklearn.datasets import make_blobs from sklearn.exceptions import NotFittedError from sklearn.linear_model import Perceptron +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.model_selection import GridSearchCV -from sklearn.metrics.pairwise import rbf_kernel +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, +) from sklearn.utils.validation import _check_psd_eigenvalues diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 9f3df5b64a803..0453d56a7c135 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -3,24 +3,24 @@ from io import StringIO import numpy as np +import pytest import scipy.sparse as sp - from scipy import linalg -from sklearn.decomposition import NMF, MiniBatchNMF -from sklearn.decomposition import non_negative_factorization -from sklearn.decomposition import _nmf as nmf # For testing internals from scipy.sparse import csc_matrix -import pytest - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import ignore_warnings -from sklearn.utils.extmath import squared_norm from sklearn.base import clone +from sklearn.decomposition import NMF, MiniBatchNMF +from sklearn.decomposition import _nmf as nmf # For testing internals +from sklearn.decomposition import non_negative_factorization from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.extmath import squared_norm @pytest.mark.parametrize( diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index e3ce951f7b6da..10c6049099be7 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -1,26 +1,25 @@ import sys +from io import StringIO import numpy as np +import pytest +from numpy.testing import assert_array_equal from scipy.linalg import block_diag from scipy.sparse import csr_matrix from scipy.special import psi -from numpy.testing import assert_array_equal - -import pytest from sklearn.decomposition import LatentDirichletAllocation from sklearn.decomposition._lda import ( _dirichlet_expectation_1d, _dirichlet_expectation_2d, ) - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import if_safe_multiprocessing_with_blas - from sklearn.exceptions import NotFittedError -from io import StringIO +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + if_safe_multiprocessing_with_blas, +) def _build_sparse_mtx(): diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index a7202335041ce..406661005f197 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -1,17 +1,15 @@ +import warnings + import numpy as np +import pytest import scipy as sp from numpy.testing import assert_array_equal -import pytest -import warnings - -from sklearn.utils._testing import assert_allclose - from sklearn import datasets -from sklearn.decomposition import PCA from sklearn.datasets import load_iris -from sklearn.decomposition._pca import _assess_dimension -from sklearn.decomposition._pca import _infer_dimension +from sklearn.decomposition import PCA +from sklearn.decomposition._pca import _assess_dimension, _infer_dimension +from sklearn.utils._testing import assert_allclose iris = datasets.load_iris() PCA_SOLVERS = ["full", "arpack", "randomized", "auto"] diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index db92ec582abdd..daff00be56687 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -2,17 +2,18 @@ # License: BSD 3 clause import sys -import pytest import numpy as np +import pytest from numpy.testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import if_safe_multiprocessing_with_blas - -from sklearn.decomposition import SparsePCA, MiniBatchSparsePCA, PCA +from sklearn.decomposition import PCA, MiniBatchSparsePCA, SparsePCA from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + if_safe_multiprocessing_with_blas, +) def generate_toy_data(n_components, n_samples, image_size, random_state=None): diff --git a/sklearn/decomposition/tests/test_truncated_svd.py b/sklearn/decomposition/tests/test_truncated_svd.py index bd0bde6e08aa7..4edb7d4a11109 100644 --- a/sklearn/decomposition/tests/test_truncated_svd.py +++ b/sklearn/decomposition/tests/test_truncated_svd.py @@ -1,13 +1,12 @@ """Test truncated SVD transformer.""" import numpy as np -import scipy.sparse as sp - import pytest +import scipy.sparse as sp -from sklearn.decomposition import TruncatedSVD, PCA +from sklearn.decomposition import PCA, TruncatedSVD from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_less, assert_allclose +from sklearn.utils._testing import assert_allclose, assert_array_less SVD_SOLVERS = ["arpack", "randomized"] diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index d06489fa8fd63..a69d6195d124f 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -10,21 +10,24 @@ # License: BSD 3-Clause import warnings +from numbers import Real + import numpy as np from scipy import linalg from scipy.special import expit -from numbers import Real -from .base import BaseEstimator, TransformerMixin, ClassifierMixin -from .base import _ClassNamePrefixFeaturesOutMixin +from .base import ( + BaseEstimator, + ClassifierMixin, + TransformerMixin, + _ClassNamePrefixFeaturesOutMixin, +) +from .covariance import empirical_covariance, ledoit_wolf, shrunk_covariance from .linear_model._base import LinearClassifierMixin -from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance -from .utils.multiclass import unique_labels -from .utils.validation import check_is_fitted -from .utils.multiclass import check_classification_targets -from .utils.extmath import softmax from .preprocessing import StandardScaler - +from .utils.extmath import softmax +from .utils.multiclass import check_classification_targets, unique_labels +from .utils.validation import check_is_fitted __all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"] diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 7b31ee226664c..a47ed21dcc444 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -4,20 +4,22 @@ # License: BSD 3 clause import warnings + import numpy as np import scipy.sparse as sp -from .base import BaseEstimator, ClassifierMixin, RegressorMixin -from .base import MultiOutputMixin -from .utils import check_random_state -from .utils import deprecated -from .utils.validation import _num_samples -from .utils.validation import check_array -from .utils.validation import check_consistent_length -from .utils.validation import check_is_fitted, _check_sample_weight +from .base import BaseEstimator, ClassifierMixin, MultiOutputMixin, RegressorMixin +from .utils import check_random_state, deprecated +from .utils.multiclass import class_distribution from .utils.random import _random_choice_csc from .utils.stats import _weighted_percentile -from .utils.multiclass import class_distribution +from .utils.validation import ( + _check_sample_weight, + _num_samples, + check_array, + check_consistent_length, + check_is_fitted, +) class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator): diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py index e892d36a0ce46..f4a3756bdaf1d 100644 --- a/sklearn/ensemble/__init__.py +++ b/sklearn/ensemble/__init__.py @@ -2,27 +2,24 @@ The :mod:`sklearn.ensemble` module includes ensemble-based methods for classification, regression and anomaly detection. """ +from ._bagging import BaggingClassifier, BaggingRegressor from ._base import BaseEnsemble -from ._forest import RandomForestClassifier -from ._forest import RandomForestRegressor -from ._forest import RandomTreesEmbedding -from ._forest import ExtraTreesClassifier -from ._forest import ExtraTreesRegressor -from ._bagging import BaggingClassifier -from ._bagging import BaggingRegressor -from ._iforest import IsolationForest -from ._weight_boosting import AdaBoostClassifier -from ._weight_boosting import AdaBoostRegressor -from ._gb import GradientBoostingClassifier -from ._gb import GradientBoostingRegressor -from ._voting import VotingClassifier -from ._voting import VotingRegressor -from ._stacking import StackingClassifier -from ._stacking import StackingRegressor +from ._forest import ( + ExtraTreesClassifier, + ExtraTreesRegressor, + RandomForestClassifier, + RandomForestRegressor, + RandomTreesEmbedding, +) +from ._gb import GradientBoostingClassifier, GradientBoostingRegressor from ._hist_gradient_boosting.gradient_boosting import ( - HistGradientBoostingRegressor, HistGradientBoostingClassifier, + HistGradientBoostingRegressor, ) +from ._iforest import IsolationForest +from ._stacking import StackingClassifier, StackingRegressor +from ._voting import VotingClassifier, VotingRegressor +from ._weight_boosting import AdaBoostClassifier, AdaBoostRegressor __all__ = [ "BaseEnsemble", diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index 9c8faa783c788..5052f940eb79a 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -6,25 +6,24 @@ import itertools import numbers -import numpy as np from abc import ABCMeta, abstractmethod -from warnings import warn from functools import partial +from warnings import warn + +import numpy as np from joblib import Parallel -from ._base import BaseEnsemble, _partition_estimators from ..base import ClassifierMixin, RegressorMixin -from ..metrics import r2_score, accuracy_score +from ..metrics import accuracy_score, r2_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import check_random_state, column_or_1d, deprecated -from ..utils import indices_to_mask +from ..utils import check_random_state, column_or_1d, deprecated, indices_to_mask +from ..utils.fixes import delayed from ..utils.metaestimators import available_if from ..utils.multiclass import check_classification_targets from ..utils.random import sample_without_replacement -from ..utils.validation import has_fit_parameter, check_is_fitted, _check_sample_weight -from ..utils.fixes import delayed - +from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter +from ._base import BaseEnsemble, _partition_estimators __all__ = ["BaggingClassifier", "BaggingRegressor"] diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py index 6e97bef0678a2..e89b82c185648 100644 --- a/sklearn/ensemble/_base.py +++ b/sklearn/ensemble/_base.py @@ -3,26 +3,22 @@ # Authors: Gilles Louppe # License: BSD 3 clause -from abc import ABCMeta, abstractmethod import numbers +from abc import ABCMeta, abstractmethod from typing import List import numpy as np from joblib import effective_n_jobs -from ..base import clone -from ..base import is_classifier, is_regressor -from ..base import BaseEstimator -from ..base import MetaEstimatorMixin +from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier, is_regressor from ..tree import ( - DecisionTreeRegressor, - ExtraTreeRegressor, BaseDecisionTree, DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeRegressor, ) -from ..utils import Bunch, _print_elapsed_time -from ..utils import check_random_state +from ..utils import Bunch, _print_elapsed_time, check_random_state from ..utils.metaestimators import _BaseComposition diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 919586001c58e..b8b2787e8bc0a 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -41,18 +41,24 @@ class calls the ``fit`` method of each sub-estimator on random samples import numbers -from warnings import catch_warnings, simplefilter, warn import threading - from abc import ABCMeta, abstractmethod +from warnings import catch_warnings, simplefilter, warn + import numpy as np -from scipy.sparse import issparse from scipy.sparse import hstack as sparse_hstack -from joblib import Parallel +from scipy.sparse import issparse -from ..base import is_classifier -from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin +from joblib import Parallel +from ..base import ( + ClassifierMixin, + MultiOutputMixin, + RegressorMixin, + TransformerMixin, + is_classifier, +) +from ..exceptions import DataConversionWarning from ..metrics import accuracy_score, r2_score from ..preprocessing import OneHotEncoder from ..tree import ( @@ -61,19 +67,17 @@ class calls the ``fit`` method of each sub-estimator on random samples ExtraTreeClassifier, ExtraTreeRegressor, ) -from ..tree._tree import DTYPE, DOUBLE +from ..tree._tree import DOUBLE, DTYPE from ..utils import check_random_state, compute_sample_weight, deprecated -from ..exceptions import DataConversionWarning -from ._base import BaseEnsemble, _partition_estimators from ..utils.fixes import delayed from ..utils.multiclass import check_classification_targets, type_of_target from ..utils.validation import ( - check_is_fitted, - _check_sample_weight, _check_feature_names_in, + _check_sample_weight, + _num_samples, + check_is_fitted, ) -from ..utils.validation import _num_samples - +from ._base import BaseEnsemble, _partition_estimators __all__ = [ "RandomForestClassifier", diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 9b776a7feab10..3ea2f7c450bcf 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -20,41 +20,31 @@ # Arnaud Joly, Jacob Schreiber # License: BSD 3 clause -from abc import ABCMeta -from abc import abstractmethod +import numbers import warnings +from abc import ABCMeta, abstractmethod +from time import time -from ._base import BaseEnsemble -from ..base import ClassifierMixin -from ..base import RegressorMixin -from ..base import BaseEstimator -from ..base import is_classifier -from ..utils import deprecated - -from ._gradient_boosting import predict_stages -from ._gradient_boosting import predict_stage -from ._gradient_boosting import _random_sample_mask - -import numbers import numpy as np +from scipy.sparse import csc_matrix, csr_matrix, issparse -from scipy.sparse import csc_matrix -from scipy.sparse import csr_matrix -from scipy.sparse import issparse - -from time import time +from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, is_classifier +from ..exceptions import NotFittedError from ..model_selection import train_test_split from ..tree import DecisionTreeRegressor -from ..tree._tree import DTYPE, DOUBLE -from . import _gb_losses - -from ..utils import check_random_state -from ..utils import check_array -from ..utils import check_scalar -from ..utils import column_or_1d -from ..utils.validation import check_is_fitted, _check_sample_weight +from ..tree._tree import DOUBLE, DTYPE +from ..utils import ( + check_array, + check_random_state, + check_scalar, + column_or_1d, + deprecated, +) from ..utils.multiclass import check_classification_targets -from ..exceptions import NotFittedError +from ..utils.validation import _check_sample_weight, check_is_fitted +from . import _gb_losses +from ._base import BaseEnsemble +from ._gradient_boosting import _random_sample_mask, predict_stage, predict_stages class VerboseReporter: diff --git a/sklearn/ensemble/_gb_losses.py b/sklearn/ensemble/_gb_losses.py index f6b5167d5128c..b89552bc706e8 100644 --- a/sklearn/ensemble/_gb_losses.py +++ b/sklearn/ensemble/_gb_losses.py @@ -2,16 +2,14 @@ decision trees. """ -from abc import ABCMeta -from abc import abstractmethod +from abc import ABCMeta, abstractmethod import numpy as np from scipy.special import expit, logsumexp +from ..dummy import DummyClassifier, DummyRegressor from ..tree._tree import TREE_LEAF from ..utils.stats import _weighted_percentile -from ..dummy import DummyClassifier -from ..dummy import DummyRegressor class LossFunction(metaclass=ABCMeta): diff --git a/sklearn/ensemble/_gradient_boosting.pyx b/sklearn/ensemble/_gradient_boosting.pyx index 624d3e165502a..6baee8581b307 100644 --- a/sklearn/ensemble/_gradient_boosting.pyx +++ b/sklearn/ensemble/_gradient_boosting.pyx @@ -3,22 +3,18 @@ # License: BSD 3 clause cimport cython - from libc.stdlib cimport free from libc.string cimport memset import numpy as np + cimport numpy as np + np.import_array() -from scipy.sparse import issparse -from scipy.sparse import csr_matrix +from scipy.sparse import csr_matrix, issparse -from ..tree._tree cimport Node -from ..tree._tree cimport Tree -from ..tree._tree cimport DTYPE_t -from ..tree._tree cimport SIZE_t -from ..tree._tree cimport INT32_t +from ..tree._tree cimport DTYPE_t, INT32_t, Node, SIZE_t, Tree from ..tree._utils cimport safe_realloc ctypedef np.int32_t int32 @@ -26,10 +22,11 @@ ctypedef np.float64_t float64 ctypedef np.uint8_t uint8 # no namespace lookup for numpy dtype and array creation -from numpy import zeros as np_zeros -from numpy import ones as np_ones + from numpy import float32 as np_float32 from numpy import float64 as np_float64 +from numpy import ones as np_ones +from numpy import zeros as np_zeros # constant to mark tree leafs diff --git a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx index 3f0276b589bd9..26085e4ac1013 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx @@ -3,11 +3,14 @@ cimport cython import numpy as np + from numpy.math cimport INFINITY + from cython.parallel import prange + from libc.math cimport isnan -from .common cimport X_DTYPE_C, X_BINNED_DTYPE_C +from .common cimport X_BINNED_DTYPE_C, X_DTYPE_C def _map_to_bins(const X_DTYPE_C [:, :] data, diff --git a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd index 4aea8276c4398..488b66d300631 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd +++ b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd @@ -1,7 +1,5 @@ -from .common cimport X_BINNED_DTYPE_C -from .common cimport BITSET_DTYPE_C -from .common cimport BITSET_INNER_DTYPE_C -from .common cimport X_DTYPE_C +from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, X_BINNED_DTYPE_C, X_DTYPE_C + cdef void init_bitset(BITSET_DTYPE_C bitset) nogil diff --git a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx index 0d3b630f3314f..249585aaf22f1 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx @@ -1,8 +1,4 @@ -from .common cimport BITSET_INNER_DTYPE_C -from .common cimport BITSET_DTYPE_C -from .common cimport X_DTYPE_C -from .common cimport X_BINNED_DTYPE_C - +from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, X_BINNED_DTYPE_C, X_DTYPE_C # A bitset is a data structure used to represent sets of integers in [0, n]. We # use them to represent sets of features indices (e.g. features that go to the diff --git a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx index bc27278ba9a1a..daf081bb926b4 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx @@ -1,10 +1,12 @@ # Author: Nicolas Hug cimport cython -from cython.parallel import prange + import numpy as np +from cython.parallel import prange from .common import Y_DTYPE + from .common cimport Y_DTYPE_C diff --git a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx index 461747b3b1323..bd3a9e8ed86a7 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx @@ -1,19 +1,21 @@ # Author: Nicolas Hug cimport cython + from cython.parallel import prange + from libc.math cimport isnan + import numpy as np + from numpy.math cimport INFINITY -from .common cimport X_DTYPE_C -from .common cimport Y_DTYPE_C +from .common cimport X_DTYPE_C, Y_DTYPE_C + from .common import Y_DTYPE -from .common cimport X_BINNED_DTYPE_C -from .common cimport BITSET_INNER_DTYPE_C -from .common cimport BITSET_DTYPE_C -from .common cimport node_struct + from ._bitset cimport in_bitset_2d_memoryview +from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, X_BINNED_DTYPE_C, node_struct def _predict_from_raw_data( # raw data = non-binned data diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index a553a307d262b..e8fb46eb06265 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -9,14 +9,14 @@ import numpy as np -from ...utils import check_random_state, check_array from ...base import BaseEstimator, TransformerMixin -from ...utils.validation import check_is_fitted -from ...utils.fixes import percentile +from ...utils import check_array, check_random_state from ...utils._openmp_helpers import _openmp_effective_n_threads +from ...utils.fixes import percentile +from ...utils.validation import check_is_fitted from ._binning import _map_to_bins -from .common import X_DTYPE, X_BINNED_DTYPE, ALMOST_INF, X_BITSET_INNER_DTYPE from ._bitset import set_bitset_memoryview +from .common import ALMOST_INF, X_BINNED_DTYPE, X_BITSET_INNER_DTYPE, X_DTYPE def _find_binning_thresholds(col_data, max_bins): diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index e36f1beb86dd8..b767111fdb92d 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1,41 +1,40 @@ """Fast Gradient Boosting decision trees for classification and regression.""" # Author: Nicolas Hug +import warnings from abc import ABC, abstractmethod from functools import partial -import warnings +from timeit import default_timer as time import numpy as np -from timeit import default_timer as time + from ..._loss.loss import ( _LOSSES, - BaseLoss, AbsoluteError, + BaseLoss, HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss, HalfSquaredError, PinballLoss, ) -from ...base import BaseEstimator, RegressorMixin, ClassifierMixin, is_classifier +from ...base import BaseEstimator, ClassifierMixin, RegressorMixin, is_classifier +from ...metrics import check_scoring +from ...model_selection import train_test_split +from ...preprocessing import LabelEncoder from ...utils import check_random_state, resample +from ...utils._openmp_helpers import _openmp_effective_n_threads +from ...utils.multiclass import check_classification_targets from ...utils.validation import ( - check_is_fitted, - check_consistent_length, _check_sample_weight, + check_consistent_length, + check_is_fitted, ) -from ...utils._openmp_helpers import _openmp_effective_n_threads -from ...utils.multiclass import check_classification_targets -from ...metrics import check_scoring -from ...model_selection import train_test_split -from ...preprocessing import LabelEncoder from ._gradient_boosting import _update_raw_predictions -from .common import Y_DTYPE, X_DTYPE, G_H_DTYPE - from .binning import _BinMapper +from .common import G_H_DTYPE, X_DTYPE, Y_DTYPE from .grower import TreeGrower - _LOSSES = _LOSSES.copy() # TODO(1.2): Remove "least_squares" and "least_absolute_deviation" # TODO(1.3): Remove "binary_crossentropy" and "categorical_crossentropy" diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index 1733b5745f8a2..4d2550ee19242 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -6,22 +6,25 @@ """ # Author: Nicolas Hug -from heapq import heappush, heappop -import numpy as np -from timeit import default_timer as time import numbers +from heapq import heappop, heappush +from timeit import default_timer as time -from .splitting import Splitter +import numpy as np + +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + +from ._bitset import set_raw_bitset_from_binned_bitset +from .common import ( + PREDICTOR_RECORD_DTYPE, + X_BITSET_INNER_DTYPE, + Y_DTYPE, + MonotonicConstraint, +) from .histogram import HistogramBuilder from .predictor import TreePredictor +from .splitting import Splitter from .utils import sum_parallel -from .common import PREDICTOR_RECORD_DTYPE -from .common import X_BITSET_INNER_DTYPE -from .common import Y_DTYPE -from .common import MonotonicConstraint -from ._bitset import set_raw_bitset_from_binned_bitset -from sklearn.utils._openmp_helpers import _openmp_effective_n_threads - EPS = np.finfo(Y_DTYPE).eps # to avoid zero division errors diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx index cd4b999dd0d26..83fda75c575b0 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx @@ -3,15 +3,13 @@ # Author: Nicolas Hug cimport cython -from cython.parallel import prange import numpy as np +from cython.parallel import prange from .common import HISTOGRAM_DTYPE -from .common cimport hist_struct -from .common cimport X_BINNED_DTYPE_C -from .common cimport G_H_DTYPE_C +from .common cimport G_H_DTYPE_C, X_BINNED_DTYPE_C, hist_struct # Notes: # - IN views are read-only, OUT views are write-only @@ -180,7 +178,7 @@ cdef class HistogramBuilder: unsigned char hessians_are_constant = \ self.hessians_are_constant unsigned int bin_idx = 0 - + for bin_idx in range(self.n_bins): histograms[feature_idx, bin_idx].sum_gradients = 0. histograms[feature_idx, bin_idx].sum_hessians = 0. diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py index 746fa34753121..600e55e43467f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py @@ -5,10 +5,12 @@ import numpy as np +from ._predictor import ( + _compute_partial_dependence, + _predict_from_binned_data, + _predict_from_raw_data, +) from .common import Y_DTYPE -from ._predictor import _predict_from_raw_data -from ._predictor import _predict_from_binned_data -from ._predictor import _compute_partial_dependence class TreePredictor: diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx index 06f6c9344d205..4deedc5b02afb 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx @@ -8,22 +8,20 @@ # Author: Nicolas Hug cimport cython -from cython.parallel import prange + import numpy as np -from libc.stdlib cimport malloc, free, qsort +from cython.parallel import prange + +from libc.stdlib cimport free, malloc, qsort from libc.string cimport memcpy from numpy.math cimport INFINITY -from .common cimport X_BINNED_DTYPE_C -from .common cimport Y_DTYPE_C -from .common cimport hist_struct +from .common cimport X_BINNED_DTYPE_C, Y_DTYPE_C, hist_struct + from .common import HISTOGRAM_DTYPE -from .common cimport BITSET_INNER_DTYPE_C -from .common cimport BITSET_DTYPE_C -from .common cimport MonotonicConstraint -from ._bitset cimport init_bitset -from ._bitset cimport set_bitset -from ._bitset cimport in_bitset + +from ._bitset cimport in_bitset, init_bitset, set_bitset +from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, MonotonicConstraint cdef struct split_info_struct: diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py index 4581173fefe67..6c318ce22a2be 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py @@ -1,15 +1,17 @@ import numpy as np -from numpy.testing import assert_array_equal, assert_allclose import pytest +from numpy.testing import assert_allclose, assert_array_equal from sklearn.ensemble._hist_gradient_boosting.binning import ( _BinMapper, _find_binning_thresholds, _map_to_bins, ) -from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF +from sklearn.ensemble._hist_gradient_boosting.common import ( + ALMOST_INF, + X_BINNED_DTYPE, + X_DTYPE, +) from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py index e058781cefcef..c02d66b666f80 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py @@ -1,10 +1,10 @@ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from sklearn.ensemble._hist_gradient_boosting._bitset import ( - set_bitset_memoryview, in_bitset_memoryview, + set_bitset_memoryview, set_raw_bitset_from_binned_bitset, ) from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py index f5c373ed84558..ca82bf367f09a 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py @@ -1,13 +1,15 @@ -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score -from sklearn.datasets import make_classification, make_regression import numpy as np import pytest -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split @pytest.mark.parametrize("seed", range(5)) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index efa1ac1a4d762..1fc9b2f867da9 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -3,33 +3,32 @@ import numpy as np import pytest from numpy.testing import assert_allclose, assert_array_equal + from sklearn._loss.loss import ( AbsoluteError, HalfBinomialLoss, HalfSquaredError, PinballLoss, ) -from sklearn.datasets import make_classification, make_regression -from sklearn.datasets import make_low_rank_matrix -from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder -from sklearn.model_selection import train_test_split, cross_val_score -from sklearn.base import clone, BaseEstimator, TransformerMixin -from sklearn.base import is_regressor -from sklearn.pipeline import make_pipeline -from sklearn.metrics import mean_poisson_deviance -from sklearn.dummy import DummyRegressor -from sklearn.exceptions import NotFittedError +from sklearn.base import BaseEstimator, TransformerMixin, clone, is_regressor from sklearn.compose import make_column_transformer - -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.datasets import make_classification, make_low_rank_matrix, make_regression +from sklearn.dummy import DummyRegressor +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.exceptions import NotFittedError +from sklearn.metrics import mean_poisson_deviance +from sklearn.model_selection import cross_val_score, train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder from sklearn.utils import shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads - n_threads = _openmp_effective_n_threads() X_classification, y_classification = make_classification(random_state=0) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py index 2f1998c868f41..4453afca157bc 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py @@ -1,17 +1,18 @@ import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_equal from pytest import approx -from numpy.testing import assert_array_equal -from numpy.testing import assert_allclose -from sklearn.preprocessing import OneHotEncoder -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + X_BINNED_DTYPE, + X_BITSET_INNER_DTYPE, + X_DTYPE, + Y_DTYPE, +) +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.preprocessing import OneHotEncoder from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py index 1d5963d20739b..99f74b0f542ee 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py @@ -1,20 +1,20 @@ import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_equal -from numpy.testing import assert_allclose -from numpy.testing import assert_array_equal - +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + HISTOGRAM_DTYPE, + X_BINNED_DTYPE, +) from sklearn.ensemble._hist_gradient_boosting.histogram import ( - _build_histogram_naive, _build_histogram, + _build_histogram_naive, _build_histogram_no_hessian, - _build_histogram_root_no_hessian, _build_histogram_root, + _build_histogram_root_no_hessian, _subtract_histograms, ) -from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE @pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram]) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py index 4ab65c55a8620..afceedee624e0 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py @@ -1,17 +1,21 @@ import numpy as np import pytest +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + X_BINNED_DTYPE, + MonotonicConstraint, +) from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint +from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder from sklearn.ensemble._hist_gradient_boosting.splitting import ( Splitter, compute_node_value, ) -from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py index 856ab180459d2..3c3c9ae81bac2 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py @@ -1,25 +1,25 @@ import numpy as np -from numpy.testing import assert_allclose -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split -from sklearn.metrics import r2_score import pytest +from numpy.testing import assert_allclose +from sklearn.datasets import make_regression +from sklearn.ensemble._hist_gradient_boosting._bitset import ( + set_bitset_memoryview, + set_raw_bitset_from_binned_bitset, +) from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower -from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor from sklearn.ensemble._hist_gradient_boosting.common import ( + ALMOST_INF, G_H_DTYPE, PREDICTOR_RECORD_DTYPE, - ALMOST_INF, X_BINNED_DTYPE, X_BITSET_INNER_DTYPE, X_DTYPE, ) -from sklearn.ensemble._hist_gradient_boosting._bitset import ( - set_bitset_memoryview, - set_raw_bitset_from_binned_bitset, -) +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor +from sklearn.metrics import r2_score +from sklearn.model_selection import train_test_split from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py index 0d19bdc6df72b..24c18e468e457 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py @@ -2,17 +2,19 @@ import pytest from numpy.testing import assert_array_equal -from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE -from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint +from sklearn.ensemble._hist_gradient_boosting.common import ( + G_H_DTYPE, + HISTOGRAM_DTYPE, + X_BINNED_DTYPE, + MonotonicConstraint, +) +from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder from sklearn.ensemble._hist_gradient_boosting.splitting import ( Splitter, compute_node_value, ) -from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder -from sklearn.utils._testing import skip_if_32bit from sklearn.utils._openmp_helpers import _openmp_effective_n_threads +from sklearn.utils._testing import skip_if_32bit n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index f8d7533ec38bc..03a2720b36127 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -1,17 +1,15 @@ import numpy as np -from numpy.testing import assert_array_equal -from numpy.testing import assert_allclose - import pytest +from numpy.testing import assert_allclose, assert_array_equal from sklearn.base import clone from sklearn.datasets import make_classification, make_regression - -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.metrics import check_scoring - X_classification, y_classification = make_classification(random_state=0) X_regression, y_regression = make_regression(random_state=0) diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx index d2123ecc61510..ce8fb51653ed7 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx @@ -5,8 +5,8 @@ from cython.parallel import prange from ...base import is_classifier from .binning import _BinMapper -from .common cimport G_H_DTYPE_C -from .common cimport Y_DTYPE_C + +from .common cimport G_H_DTYPE_C, Y_DTYPE_C def get_equivalent_estimator(estimator, lib='lightgbm', n_classes=None): @@ -115,24 +115,21 @@ def get_equivalent_estimator(estimator, lib='lightgbm', n_classes=None): } if lib == 'lightgbm': - from lightgbm import LGBMRegressor - from lightgbm import LGBMClassifier + from lightgbm import LGBMClassifier, LGBMRegressor if is_classifier(estimator): return LGBMClassifier(**lightgbm_params) else: return LGBMRegressor(**lightgbm_params) elif lib == 'xgboost': - from xgboost import XGBRegressor - from xgboost import XGBClassifier + from xgboost import XGBClassifier, XGBRegressor if is_classifier(estimator): return XGBClassifier(**xgboost_params) else: return XGBRegressor(**xgboost_params) else: - from catboost import CatBoostRegressor - from catboost import CatBoostClassifier + from catboost import CatBoostClassifier, CatBoostRegressor if is_classifier(estimator): return CatBoostClassifier(**catboost_params) else: diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py index 5d50fad4780a7..c50200c119fd5 100644 --- a/sklearn/ensemble/_iforest.py +++ b/sklearn/ensemble/_iforest.py @@ -3,21 +3,16 @@ # License: BSD 3 clause import numbers +from warnings import warn + import numpy as np from scipy.sparse import issparse -from warnings import warn +from ..base import OutlierMixin from ..tree import ExtraTreeRegressor from ..tree._tree import DTYPE as tree_dtype -from ..utils import ( - check_random_state, - check_array, - gen_batches, - get_chunk_n_rows, -) -from ..utils.validation import check_is_fitted, _num_samples -from ..base import OutlierMixin - +from ..utils import check_array, check_random_state, gen_batches, get_chunk_n_rows +from ..utils.validation import _num_samples, check_is_fitted from ._bagging import BaseBagging __all__ = ["IsolationForest"] diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 01556ec72e641..9f8754e410653 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -7,34 +7,34 @@ from copy import deepcopy import numpy as np -from joblib import Parallel import scipy.sparse as sparse -from ..base import clone -from ..base import ClassifierMixin, RegressorMixin, TransformerMixin -from ..base import is_classifier, is_regressor -from ..exceptions import NotFittedError -from ..utils._estimator_html_repr import _VisualBlock - -from ._base import _fit_single_estimator -from ._base import _BaseHeterogeneousEnsemble - -from ..linear_model import LogisticRegression -from ..linear_model import RidgeCV - -from ..model_selection import cross_val_predict -from ..model_selection import check_cv +from joblib import Parallel +from ..base import ( + ClassifierMixin, + RegressorMixin, + TransformerMixin, + clone, + is_classifier, + is_regressor, +) +from ..exceptions import NotFittedError +from ..linear_model import LogisticRegression, RidgeCV +from ..model_selection import check_cv, cross_val_predict from ..preprocessing import LabelEncoder - from ..utils import Bunch +from ..utils._estimator_html_repr import _VisualBlock +from ..utils.fixes import delayed from ..utils.metaestimators import available_if from ..utils.multiclass import check_classification_targets -from ..utils.validation import check_is_fitted -from ..utils.validation import check_scalar -from ..utils.validation import column_or_1d -from ..utils.fixes import delayed -from ..utils.validation import _check_feature_names_in +from ..utils.validation import ( + _check_feature_names_in, + check_is_fitted, + check_scalar, + column_or_1d, +) +from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator def _estimator_has(attr): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 89f6dcd9ba217..b8102e645e8ce 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -13,30 +13,23 @@ # # License: BSD 3 clause +import numbers from abc import abstractmethod -import numbers import numpy as np from joblib import Parallel -from ..base import ClassifierMixin -from ..base import RegressorMixin -from ..base import TransformerMixin -from ..base import clone -from ._base import _fit_single_estimator -from ._base import _BaseHeterogeneousEnsemble -from ..preprocessing import LabelEncoder -from ..utils import Bunch -from ..utils import check_scalar -from ..utils.metaestimators import available_if -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_feature_names_in -from ..utils.multiclass import check_classification_targets -from ..utils.validation import column_or_1d +from ..base import ClassifierMixin, RegressorMixin, TransformerMixin, clone from ..exceptions import NotFittedError +from ..preprocessing import LabelEncoder +from ..utils import Bunch, check_scalar from ..utils._estimator_html_repr import _VisualBlock from ..utils.fixes import delayed +from ..utils.metaestimators import available_if +from ..utils.multiclass import check_classification_targets +from ..utils.validation import _check_feature_names_in, check_is_fitted, column_or_1d +from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 902d960f5e06c..2fffbda19885e 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -23,28 +23,25 @@ # # License: BSD 3 clause -from abc import ABCMeta, abstractmethod - import numbers -import numpy as np - import warnings +from abc import ABCMeta, abstractmethod +import numpy as np from scipy.special import xlogy -from ._base import BaseEnsemble from ..base import ClassifierMixin, RegressorMixin, is_classifier, is_regressor - -from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import check_random_state, _safe_indexing -from ..utils import check_scalar -from ..utils.extmath import softmax -from ..utils.extmath import stable_cumsum from ..metrics import accuracy_score, r2_score -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_sample_weight -from ..utils.validation import has_fit_parameter -from ..utils.validation import _num_samples +from ..tree import DecisionTreeClassifier, DecisionTreeRegressor +from ..utils import _safe_indexing, check_random_state, check_scalar +from ..utils.extmath import softmax, stable_cumsum +from ..utils.validation import ( + _check_sample_weight, + _num_samples, + check_is_fitted, + has_fit_parameter, +) +from ._base import BaseEnsemble __all__ = [ "AdaBoostClassifier", diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index 1f78e61ab24fa..22d97cea99a2a 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -4,33 +4,28 @@ # Author: Gilles Louppe # License: BSD 3 clause -from itertools import product +from itertools import cycle, product import numpy as np -import joblib import pytest +from scipy.sparse import csc_matrix, csr_matrix +import joblib from sklearn.base import BaseEstimator - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal +from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2 from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.model_selection import GridSearchCV, ParameterGrid from sklearn.ensemble import BaggingClassifier, BaggingRegressor -from sklearn.linear_model import Perceptron, LogisticRegression +from sklearn.feature_selection import SelectKBest +from sklearn.linear_model import LogisticRegression, Perceptron +from sklearn.model_selection import GridSearchCV, ParameterGrid, train_test_split from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.svm import SVC, SVR -from sklearn.random_projection import SparseRandomProjection from sklearn.pipeline import make_pipeline -from sklearn.feature_selection import SelectKBest -from sklearn.model_selection import train_test_split -from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2 -from sklearn.utils import check_random_state from sklearn.preprocessing import FunctionTransformer, scale -from itertools import cycle - -from scipy.sparse import csc_matrix, csr_matrix +from sklearn.random_projection import SparseRandomProjection +from sklearn.svm import SVC, SVR +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal rng = check_random_state(0) diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index 46b638c179859..3f5c488d8eeac 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -5,17 +5,18 @@ # Authors: Gilles Louppe # License: BSD 3 clause +from collections import OrderedDict + import numpy as np import pytest from sklearn.datasets import load_iris +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.ensemble import BaggingClassifier from sklearn.ensemble._base import _set_random_states +from sklearn.feature_selection import SelectFromModel from sklearn.linear_model import Perceptron -from collections import OrderedDict -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.pipeline import Pipeline -from sklearn.feature_selection import SelectFromModel def test_base(): diff --git a/sklearn/ensemble/tests/test_common.py b/sklearn/ensemble/tests/test_common.py index 6c438571eaf39..051408b5ba2f0 100644 --- a/sklearn/ensemble/tests/test_common.py +++ b/sklearn/ensemble/tests/test_common.py @@ -1,21 +1,25 @@ import numpy as np import pytest -from sklearn.base import clone -from sklearn.base import ClassifierMixin -from sklearn.base import is_classifier - -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression -from sklearn.datasets import load_iris, load_diabetes +from sklearn.base import ClassifierMixin, clone, is_classifier +from sklearn.datasets import ( + load_diabetes, + load_iris, + make_classification, + make_regression, +) +from sklearn.ensemble import ( + RandomForestClassifier, + RandomForestRegressor, + StackingClassifier, + StackingRegressor, + VotingClassifier, + VotingRegressor, +) from sklearn.impute import SimpleImputer -from sklearn.linear_model import LogisticRegression, LinearRegression -from sklearn.svm import LinearSVC, LinearSVR, SVC, SVR +from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.pipeline import make_pipeline -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor - -from sklearn.ensemble import StackingClassifier, StackingRegressor -from sklearn.ensemble import VotingClassifier, VotingRegressor +from sklearn.svm import SVC, SVR, LinearSVC, LinearSVR X, y = load_iris(return_X_y=True) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index c235ec078f2e6..dbd65f3554898 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -8,53 +8,45 @@ # Arnaud Joly # License: BSD 3 clause -import pickle +import itertools import math +import pickle from collections import defaultdict -import itertools -from itertools import combinations -from itertools import product -from typing import Dict, Any +from itertools import combinations, product +from typing import Any, Dict import numpy as np -from scipy.sparse import csr_matrix -from scipy.sparse import csc_matrix -from scipy.sparse import coo_matrix -from scipy.special import comb - import pytest - -import joblib from numpy.testing import assert_allclose +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix +from scipy.special import comb -from sklearn.dummy import DummyRegressor -from sklearn.metrics import mean_poisson_deviance -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import _convert_container -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import skip_if_no_parallel - -from sklearn.exceptions import NotFittedError - +import joblib from sklearn import datasets -from sklearn.decomposition import TruncatedSVD from sklearn.datasets import make_classification -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import ExtraTreesRegressor -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import RandomTreesEmbedding -from sklearn.model_selection import train_test_split -from sklearn.model_selection import GridSearchCV +from sklearn.decomposition import TruncatedSVD +from sklearn.dummy import DummyRegressor +from sklearn.ensemble import ( + ExtraTreesClassifier, + ExtraTreesRegressor, + RandomForestClassifier, + RandomForestRegressor, + RandomTreesEmbedding, +) +from sklearn.exceptions import NotFittedError +from sklearn.metrics import mean_poisson_deviance, mean_squared_error +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.svm import LinearSVC -from sklearn.utils.validation import check_random_state - -from sklearn.metrics import mean_squared_error - from sklearn.tree._classes import SPARSE_SPLITTERS - +from sklearn.utils._testing import ( + _convert_container, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, + skip_if_no_parallel, +) +from sklearn.utils.validation import check_random_state # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 5a28bed077036..0adff603dc624 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -2,38 +2,33 @@ Testing for the gradient boosting module (sklearn.ensemble.gradient_boosting). """ import warnings + import numpy as np +import pytest from numpy.testing import assert_allclose - -from scipy.sparse import csr_matrix -from scipy.sparse import csc_matrix -from scipy.sparse import coo_matrix +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix from scipy.special import expit -import pytest - from sklearn import datasets from sklearn.base import clone from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.ensemble import GradientBoostingRegressor +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor from sklearn.ensemble._gradient_boosting import predict_stages -from sklearn.preprocessing import OneHotEncoder, scale -from sklearn.svm import LinearSVC +from sklearn.exceptions import DataConversionWarning, NotFittedError +from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import OneHotEncoder, scale +from sklearn.svm import LinearSVC, NuSVR from sklearn.utils import check_random_state, tosequence from sklearn.utils._mocking import NoSampleWeightWrapper -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import skip_if_32bit -from sklearn.exceptions import DataConversionWarning -from sklearn.exceptions import NotFittedError -from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.pipeline import make_pipeline -from sklearn.linear_model import LinearRegression -from sklearn.svm import NuSVR - +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_array_equal, + skip_if_32bit, +) GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier, GradientBoostingRegressor] @@ -774,9 +769,8 @@ def test_oob_multilcass_iris(): def test_verbose_output(): # Check verbose=1 does not cause error. - from io import StringIO - import sys + from io import StringIO old_stdout = sys.stdout sys.stdout = StringIO() @@ -806,8 +800,8 @@ def test_verbose_output(): def test_more_verbose_output(): # Check verbose=2 does not cause error. - from io import StringIO import sys + from io import StringIO old_stdout = sys.stdout sys.stdout = StringIO() diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py index 820b3b5697442..5b8884da05d0b 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py +++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py @@ -2,22 +2,25 @@ Testing for the gradient boosting loss functions and initial estimators. """ from itertools import product + import numpy as np -from numpy.testing import assert_allclose import pytest +from numpy.testing import assert_allclose from pytest import approx -from sklearn.utils import check_random_state +from sklearn.ensemble._gb_losses import ( + LOSS_FUNCTIONS, + BinomialDeviance, + ExponentialLoss, + HuberLossFunction, + LeastAbsoluteError, + LeastSquaresError, + MultinomialDeviance, + QuantileLossFunction, + RegressionLossFunction, +) from sklearn.metrics import mean_pinball_loss -from sklearn.ensemble._gb_losses import RegressionLossFunction -from sklearn.ensemble._gb_losses import LeastSquaresError -from sklearn.ensemble._gb_losses import LeastAbsoluteError -from sklearn.ensemble._gb_losses import HuberLossFunction -from sklearn.ensemble._gb_losses import QuantileLossFunction -from sklearn.ensemble._gb_losses import BinomialDeviance -from sklearn.ensemble._gb_losses import MultinomialDeviance -from sklearn.ensemble._gb_losses import ExponentialLoss -from sklearn.ensemble._gb_losses import LOSS_FUNCTIONS +from sklearn.utils import check_random_state def test_binomial_deviance(): diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 76464ac518af1..c8ff6d82546a1 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -6,26 +6,25 @@ # Alexandre Gramfort # License: BSD 3 clause -import pytest import warnings +from unittest.mock import Mock, patch import numpy as np +import pytest +from scipy.sparse import csc_matrix, csr_matrix -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import assert_allclose - -from sklearn.model_selection import ParameterGrid +from sklearn.datasets import load_diabetes, load_iris, make_classification from sklearn.ensemble import IsolationForest from sklearn.ensemble._iforest import _average_path_length -from sklearn.model_selection import train_test_split -from sklearn.datasets import load_diabetes, load_iris, make_classification -from sklearn.utils import check_random_state from sklearn.metrics import roc_auc_score - -from scipy.sparse import csc_matrix, csr_matrix -from unittest.mock import Mock, patch +from sklearn.model_selection import ParameterGrid, train_test_split +from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) rng = check_random_state(0) diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index 3fcc6f5dbefe8..8f9d34d188c8f 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -3,50 +3,39 @@ # Authors: Guillaume Lemaitre # License: BSD 3 clause -import pytest +from unittest.mock import Mock + import numpy as np -from numpy.testing import assert_array_equal +import pytest import scipy.sparse as sparse +from numpy.testing import assert_array_equal -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.base import RegressorMixin -from sklearn.base import clone - -from sklearn.exceptions import ConvergenceWarning - -from sklearn.datasets import load_iris -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_breast_cancer -from sklearn.datasets import make_regression -from sklearn.datasets import make_classification - -from sklearn.dummy import DummyClassifier -from sklearn.dummy import DummyRegressor -from sklearn.linear_model import LogisticRegression -from sklearn.linear_model import LinearRegression -from sklearn.svm import LinearSVC -from sklearn.svm import LinearSVR -from sklearn.svm import SVC -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import RandomForestRegressor +from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone +from sklearn.datasets import ( + load_breast_cancer, + load_diabetes, + load_iris, + make_classification, + make_regression, +) +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.ensemble import ( + RandomForestClassifier, + RandomForestRegressor, + StackingClassifier, + StackingRegressor, +) +from sklearn.exceptions import ConvergenceWarning, NotFittedError +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.model_selection import KFold, StratifiedKFold, train_test_split from sklearn.preprocessing import scale - -from sklearn.ensemble import StackingClassifier -from sklearn.ensemble import StackingRegressor - -from sklearn.model_selection import train_test_split -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import KFold - +from sklearn.svm import SVC, LinearSVC, LinearSVR from sklearn.utils._mocking import CheckingClassifier -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import ignore_warnings - -from sklearn.exceptions import NotFittedError - -from unittest.mock import Mock +from sklearn.utils._testing import ( + assert_allclose, + assert_allclose_dense_sparse, + ignore_warnings, +) diabetes = load_diabetes() X_diabetes, y_diabetes = diabetes.data, diabetes.target diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index b7d7533da09a1..ee3ee8cb75f0e 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -1,30 +1,33 @@ """Testing for the VotingClassifier and VotingRegressor""" -import pytest import re + import numpy as np +import pytest -from sklearn.utils._testing import assert_almost_equal, assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.exceptions import NotFittedError -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import LogisticRegression -from sklearn.naive_bayes import GaussianNB -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import VotingClassifier, VotingRegressor -from sklearn.tree import DecisionTreeClassifier -from sklearn.tree import DecisionTreeRegressor -from sklearn.model_selection import GridSearchCV from sklearn import datasets -from sklearn.model_selection import cross_val_score, train_test_split +from sklearn.base import BaseEstimator, ClassifierMixin, clone from sklearn.datasets import make_multilabel_classification -from sklearn.svm import SVC +from sklearn.dummy import DummyRegressor +from sklearn.ensemble import ( + RandomForestClassifier, + RandomForestRegressor, + VotingClassifier, + VotingRegressor, +) +from sklearn.exceptions import NotFittedError +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.multiclass import OneVsRestClassifier +from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier -from sklearn.base import BaseEstimator, ClassifierMixin, clone -from sklearn.dummy import DummyRegressor - +from sklearn.svm import SVC +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) # Load datasets iris = datasets.load_iris() diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index 0348641d39453..6384f2de46bac 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -1,33 +1,27 @@ """Testing for the boost module (sklearn.ensemble.boost).""" -import numpy as np -import pytest import re -from scipy.sparse import csc_matrix -from scipy.sparse import csr_matrix -from scipy.sparse import coo_matrix -from scipy.sparse import dok_matrix -from scipy.sparse import lil_matrix - -from sklearn.utils._testing import assert_array_equal, assert_array_less -from sklearn.utils._testing import assert_array_almost_equal +import numpy as np +import pytest +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, dok_matrix, lil_matrix -from sklearn.base import BaseEstimator -from sklearn.base import clone +from sklearn import datasets +from sklearn.base import BaseEstimator, clone from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.linear_model import LinearRegression -from sklearn.model_selection import train_test_split -from sklearn.model_selection import GridSearchCV -from sklearn.ensemble import AdaBoostClassifier -from sklearn.ensemble import AdaBoostRegressor +from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor from sklearn.ensemble._weight_boosting import _samme_proba +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.svm import SVC, SVR from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils import shuffle from sklearn.utils._mocking import NoSampleWeightWrapper -from sklearn import datasets - +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_array_equal, + assert_array_less, +) # Common random state rng = np.random.RandomState(0) diff --git a/sklearn/experimental/enable_halving_search_cv.py b/sklearn/experimental/enable_halving_search_cv.py index f6937b0d14c01..dd399ef35b6f7 100644 --- a/sklearn/experimental/enable_halving_search_cv.py +++ b/sklearn/experimental/enable_halving_search_cv.py @@ -19,13 +19,12 @@ flake8 to ignore the import, which appears as unused. """ +from .. import model_selection from ..model_selection._search_successive_halving import ( - HalvingRandomSearchCV, HalvingGridSearchCV, + HalvingRandomSearchCV, ) -from .. import model_selection - # use settattr to avoid mypy errors when monkeypatching setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV) setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV) diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py index f0416ac013e96..d287400c7999f 100644 --- a/sklearn/experimental/enable_hist_gradient_boosting.py +++ b/sklearn/experimental/enable_hist_gradient_boosting.py @@ -12,7 +12,6 @@ import warnings - warnings.warn( "Since version 1.0, " "it is not needed to import enable_hist_gradient_boosting anymore. " diff --git a/sklearn/experimental/enable_iterative_imputer.py b/sklearn/experimental/enable_iterative_imputer.py index 9ef9f6a0dbdf0..0b906961ca184 100644 --- a/sklearn/experimental/enable_iterative_imputer.py +++ b/sklearn/experimental/enable_iterative_imputer.py @@ -12,8 +12,8 @@ >>> from sklearn.impute import IterativeImputer """ -from ..impute._iterative import IterativeImputer from .. import impute +from ..impute._iterative import IterativeImputer # use settattr to avoid mypy errors when monkeypatching setattr(impute, "IterativeImputer", IterativeImputer) diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py index 7c9d51d0702ff..2d65e244b3e21 100644 --- a/sklearn/externals/_arff.py +++ b/sklearn/externals/_arff.py @@ -24,7 +24,7 @@ # SOFTWARE. # ============================================================================= -''' +""" The liac-arff module implements functions to read and write ARFF files in Python. It was created in the Connectionist Artificial Intelligence Laboratory (LIAC), which takes place at the Federal University of Rio Grande do Sul @@ -140,33 +140,34 @@ - Fully compatible with Python 2.7+, Python 3.5+, pypy and pypy3; - Under `MIT License `_ -''' -__author__ = 'Renato de Pontes Pereira, Matthias Feurer, Joel Nothman' -__author_email__ = ('renato.ppontes@gmail.com, ' - 'feurerm@informatik.uni-freiburg.de, ' - 'joel.nothman@gmail.com') -__version__ = '2.4.0' +""" +__author__ = "Renato de Pontes Pereira, Matthias Feurer, Joel Nothman" +__author_email__ = ( + "renato.ppontes@gmail.com, " + "feurerm@informatik.uni-freiburg.de, " + "joel.nothman@gmail.com" +) +__version__ = "2.4.0" -import re import csv -from typing import TYPE_CHECKING -from typing import Optional, List, Dict, Any, Iterator, Union, Tuple +import re +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union # CONSTANTS =================================================================== -_SIMPLE_TYPES = ['NUMERIC', 'REAL', 'INTEGER', 'STRING'] +_SIMPLE_TYPES = ["NUMERIC", "REAL", "INTEGER", "STRING"] -_TK_DESCRIPTION = '%' -_TK_COMMENT = '%' -_TK_RELATION = '@RELATION' -_TK_ATTRIBUTE = '@ATTRIBUTE' -_TK_DATA = '@DATA' +_TK_DESCRIPTION = "%" +_TK_COMMENT = "%" +_TK_RELATION = "@RELATION" +_TK_ATTRIBUTE = "@ATTRIBUTE" +_TK_DATA = "@DATA" -_RE_RELATION = re.compile(r'^([^\{\}%,\s]*|\".*\"|\'.*\')$', re.UNICODE) -_RE_ATTRIBUTE = re.compile(r'^(\".*\"|\'.*\'|[^\{\}%,\s]*)\s+(.+)$', re.UNICODE) +_RE_RELATION = re.compile(r"^([^\{\}%,\s]*|\".*\"|\'.*\')$", re.UNICODE) +_RE_ATTRIBUTE = re.compile(r"^(\".*\"|\'.*\'|[^\{\}%,\s]*)\s+(.+)$", re.UNICODE) _RE_QUOTE_CHARS = re.compile(r'["\'\\\s%,\000-\031]', re.UNICODE) _RE_ESCAPE_CHARS = re.compile(r'(?=["\'\\%])|[\n\r\t\000-\031]') -_RE_SPARSE_LINE = re.compile(r'^\s*\{.*\}\s*$', re.UNICODE) -_RE_NONTRIVIAL_DATA = re.compile('["\'{}\\s]', re.UNICODE) +_RE_SPARSE_LINE = re.compile(r"^\s*\{.*\}\s*$", re.UNICODE) +_RE_NONTRIVIAL_DATA = re.compile("[\"'{}\\s]", re.UNICODE) ArffDenseDataType = Iterator[List] ArffSparseDataType = Tuple[List, ...] @@ -187,7 +188,7 @@ class ArffContainerType(TypedDict): def _build_re_values(): - quoted_re = r''' + quoted_re = r""" " # open quote followed by zero or more of: (?: (?= len(conversors): raise BadDataFormat(row) # XXX: int 0 is used for implicit values, not '0' - values = [values[i] if i in values else 0 for i in - range(len(conversors))] + values = [ + values[i] if i in values else 0 for i in range(len(conversors)) + ] else: if len(values) != len(conversors): raise BadDataFormat(row) @@ -476,16 +498,17 @@ def decode_rows(self, stream, conversors): @staticmethod def _decode_values(values, conversors): try: - values = [None if value is None else conversor(value) - for conversor, value - in zip(conversors, values)] + values = [ + None if value is None else conversor(value) + for conversor, value in zip(conversors, values) + ] except ValueError as exc: - if 'float: ' in str(exc): + if "float: " in str(exc): raise BadNumericalValue() return values def encode_data(self, data, attributes): - '''(INTERNAL) Encodes a line of data. + """(INTERNAL) Encodes a line of data. Data instances follow the csv format, i.e, attribute values are delimited by commas. After converted from csv. @@ -493,30 +516,31 @@ def encode_data(self, data, attributes): :param data: a list of values. :param attributes: a list of attributes. Used to check if data is valid. :return: a string with the encoded data line. - ''' + """ current_row = 0 for inst in data: if len(inst) != len(attributes): raise BadObject( - 'Instance %d has %d attributes, expected %d' % - (current_row, len(inst), len(attributes)) + "Instance %d has %d attributes, expected %d" + % (current_row, len(inst), len(attributes)) ) new_data = [] for value in inst: - if value is None or value == '' or value != value: - s = '?' + if value is None or value == "" or value != value: + s = "?" else: s = encode_string(str(value)) new_data.append(s) current_row += 1 - yield ','.join(new_data) + yield ",".join(new_data) class _DataListMixin: """Mixin to return a list from decode_rows instead of a generator""" + def decode_rows(self, stream, conversors): return list(super().decode_rows(stream, conversors)) @@ -536,10 +560,12 @@ def decode_rows(self, stream, conversors): continue row_cols, values = zip(*sorted(values.items())) try: - values = [value if value is None else conversors[key](value) - for key, value in zip(row_cols, values)] + values = [ + value if value is None else conversors[key](value) + for key, value in zip(row_cols, values) + ] except ValueError as exc: - if 'float: ' in str(exc): + if "float: " in str(exc): raise BadNumericalValue() raise except IndexError: @@ -563,30 +589,30 @@ def encode_data(self, data, attributes): # Check if the rows are sorted if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)): - raise ValueError("liac-arff can only output COO matrices with " - "sorted rows.") + raise ValueError("liac-arff can only output COO matrices with sorted rows.") for v, col, row in zip(data, col, row): if row > current_row: # Add empty rows if necessary while current_row < row: - yield " ".join(["{", ','.join(new_data), "}"]) + yield " ".join(["{", ",".join(new_data), "}"]) new_data = [] current_row += 1 if col >= num_attributes: raise BadObject( - 'Instance %d has at least %d attributes, expected %d' % - (current_row, col + 1, num_attributes) + "Instance %d has at least %d attributes, expected %d" + % (current_row, col + 1, num_attributes) ) - if v is None or v == '' or v != v: - s = '?' + if v is None or v == "" or v != v: + s = "?" else: s = encode_string(str(v)) new_data.append("%d %s" % (col, s)) - yield " ".join(["{", ','.join(new_data), "}"]) + yield " ".join(["{", ",".join(new_data), "}"]) + class LODGeneratorData: def decode_rows(self, stream, conversors): @@ -596,10 +622,12 @@ def decode_rows(self, stream, conversors): if not isinstance(values, dict): raise BadLayout() try: - yield {key: None if value is None else conversors[key](value) - for key, value in values.items()} + yield { + key: None if value is None else conversors[key](value) + for key, value in values.items() + } except ValueError as exc: - if 'float: ' in str(exc): + if "float: " in str(exc): raise BadNumericalValue() raise except IndexError: @@ -615,20 +643,21 @@ def encode_data(self, data, attributes): if len(row) > 0 and max(row) >= num_attributes: raise BadObject( - 'Instance %d has %d attributes, expected %d' % - (current_row, max(row) + 1, num_attributes) + "Instance %d has %d attributes, expected %d" + % (current_row, max(row) + 1, num_attributes) ) for col in sorted(row): v = row[col] - if v is None or v == '' or v != v: - s = '?' + if v is None or v == "" or v != v: + s = "?" else: s = encode_string(str(v)) new_data.append("%d %s" % (col, s)) current_row += 1 - yield " ".join(["{", ','.join(new_data), "}"]) + yield " ".join(["{", ",".join(new_data), "}"]) + class LODData(_DataListMixin, LODGeneratorData): pass @@ -648,31 +677,33 @@ def _get_data_object_for_decoding(matrix_type): else: raise ValueError("Matrix type %s not supported." % str(matrix_type)) + def _get_data_object_for_encoding(matrix): # Probably a scipy.sparse - if hasattr(matrix, 'format'): - if matrix.format == 'coo': + if hasattr(matrix, "format"): + if matrix.format == "coo": return COOData() else: - raise ValueError('Cannot guess matrix format!') + raise ValueError("Cannot guess matrix format!") elif isinstance(matrix[0], dict): return LODData() else: return Data() + # ============================================================================= # ADVANCED INTERFACE ========================================================== class ArffDecoder: - '''An ARFF decoder.''' + """An ARFF decoder.""" def __init__(self): - '''Constructor.''' + """Constructor.""" self._conversors = [] self._current_line = 0 def _decode_comment(self, s): - '''(INTERNAL) Decodes a comment line. + """(INTERNAL) Decodes a comment line. Comments are single line strings starting, obligatorily, with the ``%`` character, and can have any symbol, including whitespaces or special @@ -683,12 +714,12 @@ def _decode_comment(self, s): :param s: a normalized string. :return: a string with the decoded comment. - ''' - res = re.sub(r'^\%( )?', '', s) + """ + res = re.sub(r"^\%( )?", "", s) return res def _decode_relation(self, s): - '''(INTERNAL) Decodes a relation line. + """(INTERNAL) Decodes a relation line. The relation declaration is a line with the format ``@RELATION ``, where ``relation-name`` is a string. The string must @@ -700,18 +731,18 @@ def _decode_relation(self, s): :param s: a normalized string. :return: a string with the decoded relation name. - ''' - _, v = s.split(' ', 1) + """ + _, v = s.split(" ", 1) v = v.strip() if not _RE_RELATION.match(v): raise BadRelationFormat() - res = str(v.strip('"\'')) + res = str(v.strip("\"'")) return res def _decode_attribute(self, s): - '''(INTERNAL) Decodes an attribute line. + """(INTERNAL) Decodes an attribute line. The attribute is the most complex declaration in an arff file. All attributes must follow the template:: @@ -736,8 +767,8 @@ def _decode_attribute(self, s): :param s: a normalized string. :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES). - ''' - _, v = s.split(' ', 1) + """ + _, v = s.split(" ", 1) v = v.strip() # Verify the general structure of declaration @@ -749,12 +780,12 @@ def _decode_attribute(self, s): name, type_ = m.groups() # Extracts the final name - name = str(name.strip('"\'')) + name = str(name.strip("\"'")) # Extracts the final type if type_[:1] == "{" and type_[-1:] == "}": try: - type_ = _parse_values(type_.strip('{} ')) + type_ = _parse_values(type_.strip("{} ")) except Exception: raise BadAttributeType() if isinstance(type_, dict): @@ -763,27 +794,27 @@ def _decode_attribute(self, s): else: # If not nominal, verify the type name type_ = str(type_).upper() - if type_ not in ['NUMERIC', 'REAL', 'INTEGER', 'STRING']: + if type_ not in ["NUMERIC", "REAL", "INTEGER", "STRING"]: raise BadAttributeType() return (name, type_) def _decode(self, s, encode_nominal=False, matrix_type=DENSE): - '''Do the job the ``encode``.''' + """Do the job the ``encode``.""" # Make sure this method is idempotent self._current_line = 0 # If string, convert to a list of lines if isinstance(s, str): - s = s.strip('\r\n ').replace('\r\n', '\n').split('\n') + s = s.strip("\r\n ").replace("\r\n", "\n").split("\n") # Create the return object obj: ArffContainerType = { - 'description': '', - 'relation': '', - 'attributes': [], - 'data': [] + "description": "", + "relation": "", + "attributes": [], + "data": [], } attribute_names = {} @@ -796,14 +827,15 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): for row in s: self._current_line += 1 # Ignore empty lines - row = row.strip(' \r\n') - if not row: continue + row = row.strip(" \r\n") + if not row: + continue u_row = row.upper() # DESCRIPTION ----------------------------------------------------- if u_row.startswith(_TK_DESCRIPTION) and STATE == _TK_DESCRIPTION: - obj['description'] += self._decode_comment(row) + '\n' + obj["description"] += self._decode_comment(row) + "\n" # ----------------------------------------------------------------- # RELATION -------------------------------------------------------- @@ -812,7 +844,7 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): raise BadLayout() STATE = _TK_RELATION - obj['relation'] = self._decode_relation(row) + obj["relation"] = self._decode_relation(row) # ----------------------------------------------------------------- # ATTRIBUTE ------------------------------------------------------- @@ -827,7 +859,7 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): raise BadAttributeName(attr[0], attribute_names[attr[0]]) else: attribute_names[attr[0]] = self._current_line - obj['attributes'].append(attr) + obj["attributes"].append(attr) if isinstance(attr[1], (list, tuple)): if encode_nominal: @@ -835,10 +867,12 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): else: conversor = NominalConversor(attr[1]) else: - CONVERSOR_MAP = {'STRING': str, - 'INTEGER': lambda x: int(float(x)), - 'NUMERIC': float, - 'REAL': float} + CONVERSOR_MAP = { + "STRING": str, + "INTEGER": lambda x: int(float(x)), + "NUMERIC": float, + "REAL": float, + } conversor = CONVERSOR_MAP[attr[1]] self._conversors.append(conversor) @@ -869,14 +903,14 @@ def stream(): yield row # Alter the data object - obj['data'] = data.decode_rows(stream(), self._conversors) - if obj['description'].endswith('\n'): - obj['description'] = obj['description'][:-1] + obj["data"] = data.decode_rows(stream(), self._conversors) + if obj["description"].endswith("\n"): + obj["description"] = obj["description"][:-1] return obj def decode(self, s, encode_nominal=False, return_type=DENSE): - '''Returns the Python representation of a given ARFF file. + """Returns the Python representation of a given ARFF file. When a file object is passed as an argument, this method reads lines iteratively, avoiding to load unnecessary information to the memory. @@ -889,20 +923,21 @@ def decode(self, s, encode_nominal=False, return_type=DENSE): `arff.DENSE_GEN` or `arff.LOD_GEN`. Consult the sections on `working with sparse data`_ and `loading progressively`_. - ''' + """ try: - return self._decode(s, encode_nominal=encode_nominal, - matrix_type=return_type) + return self._decode( + s, encode_nominal=encode_nominal, matrix_type=return_type + ) except ArffException as e: e.line = self._current_line raise e class ArffEncoder: - '''An ARFF encoder.''' + """An ARFF encoder.""" - def _encode_comment(self, s=''): - '''(INTERNAL) Encodes a comment line. + def _encode_comment(self, s=""): + """(INTERNAL) Encodes a comment line. Comments are single line strings starting, obligatorily, with the ``%`` character, and can have any symbol, including whitespaces or special @@ -912,30 +947,30 @@ def _encode_comment(self, s=''): :param s: (OPTIONAL) string. :return: a string with the encoded comment line. - ''' + """ if s: - return '%s %s'%(_TK_COMMENT, s) + return "%s %s" % (_TK_COMMENT, s) else: - return '%s' % _TK_COMMENT + return "%s" % _TK_COMMENT def _encode_relation(self, name): - '''(INTERNAL) Decodes a relation line. + """(INTERNAL) Decodes a relation line. The relation declaration is a line with the format ``@RELATION ``, where ``relation-name`` is a string. :param name: a string. :return: a string with the encoded relation declaration. - ''' - for char in ' %{},': + """ + for char in " %{},": if char in name: - name = '"%s"'%name + name = '"%s"' % name break - return '%s %s'%(_TK_RELATION, name) + return "%s %s" % (_TK_RELATION, name) def _encode_attribute(self, name, type_): - '''(INTERNAL) Encodes an attribute line. + """(INTERNAL) Encodes an attribute line. The attribute follow the template:: @@ -956,94 +991,99 @@ def _encode_attribute(self, name, type_): :param name: a string. :param type_: a string or a list of string. :return: a string with the encoded attribute declaration. - ''' - for char in ' %{},': + """ + for char in " %{},": if char in name: - name = '"%s"'%name + name = '"%s"' % name break if isinstance(type_, (tuple, list)): - type_tmp = ['%s' % encode_string(type_k) for type_k in type_] - type_ = '{%s}'%(', '.join(type_tmp)) + type_tmp = ["%s" % encode_string(type_k) for type_k in type_] + type_ = "{%s}" % ", ".join(type_tmp) - return '%s %s %s'%(_TK_ATTRIBUTE, name, type_) + return "%s %s %s" % (_TK_ATTRIBUTE, name, type_) def encode(self, obj): - '''Encodes a given object to an ARFF file. + """Encodes a given object to an ARFF file. :param obj: the object containing the ARFF information. :return: the ARFF file as an string. - ''' + """ data = [row for row in self.iter_encode(obj)] - return '\n'.join(data) + return "\n".join(data) def iter_encode(self, obj): - '''The iterative version of `arff.ArffEncoder.encode`. + """The iterative version of `arff.ArffEncoder.encode`. This encodes iteratively a given object and return, one-by-one, the lines of the ARFF file. :param obj: the object containing the ARFF information. :return: (yields) the ARFF file as strings. - ''' + """ # DESCRIPTION - if obj.get('description', None): - for row in obj['description'].split('\n'): + if obj.get("description", None): + for row in obj["description"].split("\n"): yield self._encode_comment(row) # RELATION - if not obj.get('relation'): - raise BadObject('Relation name not found or with invalid value.') + if not obj.get("relation"): + raise BadObject("Relation name not found or with invalid value.") - yield self._encode_relation(obj['relation']) - yield '' + yield self._encode_relation(obj["relation"]) + yield "" # ATTRIBUTES - if not obj.get('attributes'): - raise BadObject('Attributes not found.') + if not obj.get("attributes"): + raise BadObject("Attributes not found.") attribute_names = set() - for attr in obj['attributes']: + for attr in obj["attributes"]: # Verify for bad object format - if not isinstance(attr, (tuple, list)) or \ - len(attr) != 2 or \ - not isinstance(attr[0], str): - raise BadObject('Invalid attribute declaration "%s"'%str(attr)) + if ( + not isinstance(attr, (tuple, list)) + or len(attr) != 2 + or not isinstance(attr[0], str) + ): + raise BadObject('Invalid attribute declaration "%s"' % str(attr)) if isinstance(attr[1], str): # Verify for invalid types if attr[1] not in _SIMPLE_TYPES: - raise BadObject('Invalid attribute type "%s"'%str(attr)) + raise BadObject('Invalid attribute type "%s"' % str(attr)) # Verify for bad object format elif not isinstance(attr[1], (tuple, list)): - raise BadObject('Invalid attribute type "%s"'%str(attr)) + raise BadObject('Invalid attribute type "%s"' % str(attr)) # Verify attribute name is not used twice if attr[0] in attribute_names: - raise BadObject('Trying to use attribute name "%s" for the ' - 'second time.' % str(attr[0])) + raise BadObject( + 'Trying to use attribute name "%s" for the second time.' + % str(attr[0]) + ) else: attribute_names.add(attr[0]) yield self._encode_attribute(attr[0], attr[1]) - yield '' - attributes = obj['attributes'] + yield "" + attributes = obj["attributes"] # DATA yield _TK_DATA - if 'data' in obj: - data = _get_data_object_for_encoding(obj.get('data')) - yield from data.encode_data(obj.get('data'), attributes) + if "data" in obj: + data = _get_data_object_for_encoding(obj.get("data")) + yield from data.encode_data(obj.get("data"), attributes) + + yield "" - yield '' # ============================================================================= # BASIC INTERFACE ============================================================= def load(fp, encode_nominal=False, return_type=DENSE): - '''Load a file-like object containing the ARFF document and convert it into + """Load a file-like object containing the ARFF document and convert it into a Python object. :param fp: a file-like object. @@ -1055,13 +1095,13 @@ def load(fp, encode_nominal=False, return_type=DENSE): Consult the sections on `working with sparse data`_ and `loading progressively`_. :return: a dictionary. - ''' + """ decoder = ArffDecoder() - return decoder.decode(fp, encode_nominal=encode_nominal, - return_type=return_type) + return decoder.decode(fp, encode_nominal=encode_nominal, return_type=return_type) + def loads(s, encode_nominal=False, return_type=DENSE): - '''Convert a string instance containing the ARFF document into a Python + """Convert a string instance containing the ARFF document into a Python object. :param s: a string object. @@ -1073,35 +1113,38 @@ def loads(s, encode_nominal=False, return_type=DENSE): Consult the sections on `working with sparse data`_ and `loading progressively`_. :return: a dictionary. - ''' + """ decoder = ArffDecoder() - return decoder.decode(s, encode_nominal=encode_nominal, - return_type=return_type) + return decoder.decode(s, encode_nominal=encode_nominal, return_type=return_type) + def dump(obj, fp): - '''Serialize an object representing the ARFF document to a given file-like + """Serialize an object representing the ARFF document to a given file-like object. :param obj: a dictionary. :param fp: a file-like object. - ''' + """ encoder = ArffEncoder() generator = encoder.iter_encode(obj) last_row = next(generator) for row in generator: - fp.write(last_row + '\n') + fp.write(last_row + "\n") last_row = row fp.write(last_row) return fp + def dumps(obj): - '''Serialize an object representing the ARFF document, returning a string. + """Serialize an object representing the ARFF document, returning a string. :param obj: a dictionary. :return: a string with the ARFF document. - ''' + """ encoder = ArffEncoder() return encoder.encode(obj) + + # ============================================================================= diff --git a/sklearn/externals/_lobpcg.py b/sklearn/externals/_lobpcg.py index 1de3900b3f89c..cb96d8b296a3a 100644 --- a/sklearn/externals/_lobpcg.py +++ b/sklearn/externals/_lobpcg.py @@ -21,11 +21,11 @@ """ import warnings + import numpy as np -from scipy.linalg import (inv, eigh, cho_factor, cho_solve, - cholesky, LinAlgError) -from scipy.sparse.linalg import aslinearoperator from numpy import block as bmat +from scipy.linalg import LinAlgError, cho_factor, cho_solve, cholesky, eigh, inv +from scipy.sparse.linalg import aslinearoperator __all__ = ["lobpcg"] @@ -42,10 +42,12 @@ def _report_nonhermitian(M, name): tol = max(tol, tol * norm(M, 1)) if nmd > tol: warnings.warn( - f"Matrix {name} of the type {M.dtype} is not Hermitian: " - f"condition: {nmd} < {tol} fails.", - UserWarning, stacklevel=4 - ) + f"Matrix {name} of the type {M.dtype} is not Hermitian: " + f"condition: {nmd} < {tol} fails.", + UserWarning, + stacklevel=4, + ) + def _as2d(ar): """ @@ -121,7 +123,7 @@ def _get_indx(_lambda, num, largest): """Get `num` indices into `_lambda` depending on `largest` option.""" ii = np.argsort(_lambda) if largest: - ii = ii[:-num - 1:-1] + ii = ii[: -num - 1 : -1] else: ii = ii[:num] @@ -341,8 +343,9 @@ def lobpcg( warnings.warn( f"The problem size {n} minus the constraints size {sizeY} " f"is too small relative to the block size {sizeX}. " - f"Using a dense eigensolver instead of LOBPCG.", - UserWarning, stacklevel=2 + "Using a dense eigensolver instead of LOBPCG.", + UserWarning, + stacklevel=2, ) sizeX = min(sizeX, n) @@ -361,10 +364,7 @@ def lobpcg( A_dense = A(np.eye(n, dtype=A.dtype)) B_dense = None if B is None else B(np.eye(n, dtype=B.dtype)) - vals, vecs = eigh(A_dense, - B_dense, - eigvals=eigvals, - check_finite=False) + vals, vecs = eigh(A_dense, B_dense, eigvals=eigvals, check_finite=False) if largest: # Reverse order to be compatible with eigs() in 'LM' mode. vals = vals[::-1] @@ -438,7 +438,7 @@ def lobpcg( while iterationNumber < maxiter: iterationNumber += 1 if verbosityLevel > 0: - print("-"*50) + print("-" * 50) print(f"iteration {iterationNumber}") if B is not None: @@ -488,22 +488,17 @@ def lobpcg( ## # Apply constraints to the preconditioned residuals. if blockVectorY is not None: - _applyConstraints(activeBlockVectorR, - gramYBY, - blockVectorBY, - blockVectorY) + _applyConstraints(activeBlockVectorR, gramYBY, blockVectorBY, blockVectorY) ## # B-orthogonalize the preconditioned residuals to X. if B is not None: activeBlockVectorR = activeBlockVectorR - ( - blockVectorX @ - (blockVectorBX.T.conj() @ activeBlockVectorR) + blockVectorX @ (blockVectorBX.T.conj() @ activeBlockVectorR) ) else: activeBlockVectorR = activeBlockVectorR - ( - blockVectorX @ - (blockVectorX.T.conj() @ activeBlockVectorR) + blockVectorX @ (blockVectorX.T.conj() @ activeBlockVectorR) ) ## @@ -516,7 +511,8 @@ def lobpcg( f"Failed at iteration {iterationNumber} with accuracies " f"{residualNorms}\n not reaching the requested " f"tolerance {residualTolerance}.", - UserWarning, stacklevel=2 + UserWarning, + stacklevel=2, ) break activeBlockVectorAR = A(activeBlockVectorR) @@ -596,8 +592,7 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): gramRBP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP) if explicitGramFlag: gramPAP = (gramPAP + gramPAP.T.conj()) / 2 - gramPBP = np.dot(activeBlockVectorP.T.conj(), - activeBlockVectorBP) + gramPBP = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorBP) else: gramPBP = ident @@ -619,9 +614,7 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): _handle_gramA_gramB_verbosity(gramA, gramB) try: - _lambda, eigBlockVector = eigh(gramA, - gramB, - check_finite=False) + _lambda, eigBlockVector = eigh(gramA, gramB, check_finite=False) except LinAlgError: # try again after dropping the direction vectors P from RR restart = True @@ -633,9 +626,7 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): _handle_gramA_gramB_verbosity(gramA, gramB) try: - _lambda, eigBlockVector = eigh(gramA, - gramB, - check_finite=False) + _lambda, eigBlockVector = eigh(gramA, gramB, check_finite=False) except LinAlgError as e: raise ValueError("eigh has failed in lobpcg iterations") from e @@ -664,9 +655,8 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): if B is not None: if not restart: eigBlockVectorX = eigBlockVector[:sizeX] - eigBlockVectorR = eigBlockVector[sizeX: - sizeX + currentBlockSize] - eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:] + eigBlockVectorR = eigBlockVector[sizeX : sizeX + currentBlockSize] + eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize :] pp = np.dot(activeBlockVectorR, eigBlockVectorR) pp += np.dot(activeBlockVectorP, eigBlockVectorP) @@ -698,9 +688,8 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): else: if not restart: eigBlockVectorX = eigBlockVector[:sizeX] - eigBlockVectorR = eigBlockVector[sizeX: - sizeX + currentBlockSize] - eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:] + eigBlockVectorR = eigBlockVector[sizeX : sizeX + currentBlockSize] + eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize :] pp = np.dot(activeBlockVectorR, eigBlockVectorR) pp += np.dot(activeBlockVectorP, eigBlockVectorP) @@ -739,7 +728,8 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): f"Exited at iteration {iterationNumber} with accuracies \n" f"{residualNorms}\n" f"not reaching the requested tolerance {residualTolerance}.", - UserWarning, stacklevel=2 + UserWarning, + stacklevel=2, ) # Future work: Need to add Postprocessing here: diff --git a/sklearn/externals/_numpy_compiler_patch.py b/sklearn/externals/_numpy_compiler_patch.py index a424d8e99a8ef..7bee1b66c83f9 100644 --- a/sklearn/externals/_numpy_compiler_patch.py +++ b/sklearn/externals/_numpy_compiler_patch.py @@ -29,9 +29,9 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -import sys -import subprocess import re +import subprocess +import sys from distutils.errors import DistutilsExecError from numpy.distutils import log diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py index a9c1496181b3b..f4db85303f4b6 100644 --- a/sklearn/feature_extraction/__init__.py +++ b/sklearn/feature_extraction/__init__.py @@ -4,10 +4,10 @@ images. """ +from . import text from ._dict_vectorizer import DictVectorizer from ._hash import FeatureHasher -from .image import img_to_graph, grid_to_graph -from . import text +from .image import grid_to_graph, img_to_graph __all__ = [ "DictVectorizer", diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index e04c409027bda..b4afb797bd240 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -3,9 +3,9 @@ # License: BSD 3 clause from array import array -from collections.abc import Mapping, Iterable -from operator import itemgetter +from collections.abc import Iterable, Mapping from numbers import Number +from operator import itemgetter import numpy as np import scipy.sparse as sp diff --git a/sklearn/feature_extraction/_hashing_fast.pyx b/sklearn/feature_extraction/_hashing_fast.pyx index 48dbd928a03d3..0a64f94442d88 100644 --- a/sklearn/feature_extraction/_hashing_fast.pyx +++ b/sklearn/feature_extraction/_hashing_fast.pyx @@ -1,17 +1,19 @@ # Author: Lars Buitinck # License: BSD 3 clause -import sys import array +import sys + cimport cython +cimport numpy as cnp from libc.stdlib cimport abs from libcpp.vector cimport vector -cimport numpy as cnp import numpy as np + from ..utils._typedefs cimport INT32TYPE_t, INT64TYPE_t -from ..utils.murmurhash cimport murmurhash3_bytes_s32 from ..utils._vector_sentinel cimport vector_to_nd_array +from ..utils.murmurhash cimport murmurhash3_bytes_s32 cnp.import_array() diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py index 9c330f593dbdc..2bbe1f432d10b 100644 --- a/sklearn/feature_extraction/image.py +++ b/sklearn/feature_extraction/image.py @@ -9,14 +9,15 @@ # Vlad Niculae # License: BSD 3 clause -from itertools import product import numbers +from itertools import product + import numpy as np -from scipy import sparse from numpy.lib.stride_tricks import as_strided +from scipy import sparse -from ..utils import check_array, check_random_state from ..base import BaseEstimator +from ..utils import check_array, check_random_state __all__ = [ "PatchExtractor", diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index ebdb0f084e67d..119ed5a98af02 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -3,12 +3,11 @@ # License: BSD 3 clause from random import Random -import numpy as np -import scipy.sparse as sp -from numpy.testing import assert_array_equal -from numpy.testing import assert_allclose +import numpy as np import pytest +import scipy.sparse as sp +from numpy.testing import assert_allclose, assert_array_equal from sklearn.feature_extraction import DictVectorizer from sklearn.feature_selection import SelectKBest, chi2 diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py index 79ec2922e16d8..2bdfdb75e405b 100644 --- a/sklearn/feature_extraction/tests/test_feature_hasher.py +++ b/sklearn/feature_extraction/tests/test_feature_hasher.py @@ -1,6 +1,6 @@ import numpy as np -from numpy.testing import assert_array_equal import pytest +from numpy.testing import assert_array_equal from sklearn.feature_extraction import FeatureHasher from sklearn.feature_extraction._hashing_fast import transform as _hashing_transform diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py index 41348a3535693..9d900f584aa97 100644 --- a/sklearn/feature_extraction/tests/test_image.py +++ b/sklearn/feature_extraction/tests/test_image.py @@ -3,18 +3,18 @@ # License: BSD 3 clause import numpy as np +import pytest import scipy as sp from scipy import ndimage from scipy.sparse.csgraph import connected_components -import pytest from sklearn.feature_extraction.image import ( - img_to_graph, - grid_to_graph, - extract_patches_2d, - reconstruct_from_patches_2d, PatchExtractor, _extract_patches, + extract_patches_2d, + grid_to_graph, + img_to_graph, + reconstruct_from_patches_2d, ) diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index b46958c36002e..6d4a438b437ee 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -1,43 +1,37 @@ -from collections.abc import Mapping +import pickle import re +import warnings +from collections import defaultdict +from collections.abc import Mapping +from functools import partial +from io import StringIO +import numpy as np import pytest -import warnings +from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse -from sklearn.feature_extraction.text import strip_tags -from sklearn.feature_extraction.text import strip_accents_unicode -from sklearn.feature_extraction.text import strip_accents_ascii - -from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.feature_extraction.text import TfidfTransformer -from sklearn.feature_extraction.text import TfidfVectorizer - -from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS - -from sklearn.model_selection import train_test_split -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import GridSearchCV +from sklearn.base import clone +from sklearn.feature_extraction.text import ( + ENGLISH_STOP_WORDS, + CountVectorizer, + HashingVectorizer, + TfidfTransformer, + TfidfVectorizer, + strip_accents_ascii, + strip_accents_unicode, + strip_tags, +) +from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC - -from sklearn.base import clone - -import numpy as np -from numpy.testing import assert_array_almost_equal -from numpy.testing import assert_array_equal from sklearn.utils import IS_PYPY from sklearn.utils._testing import ( + assert_allclose_dense_sparse, assert_almost_equal, fails_if_pypy, - assert_allclose_dense_sparse, skip_if_32bit, ) -from collections import defaultdict -from functools import partial -import pickle -from io import StringIO JUNK_FOOD_DOCS = ( "the pizza pizza beer copyright", diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index b565aeadc53c8..46deab05c25fa 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -12,27 +12,26 @@ """ import array -from collections import defaultdict -from collections.abc import Mapping -from functools import partial import numbers -from operator import itemgetter import re import unicodedata import warnings +from collections import defaultdict +from collections.abc import Mapping +from functools import partial +from operator import itemgetter import numpy as np import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin, _OneToOneFeatureMixin +from ..exceptions import NotFittedError from ..preprocessing import normalize +from ..utils import _IS_32BIT +from ..utils.deprecation import deprecated +from ..utils.validation import FLOAT_DTYPES, check_array, check_is_fitted, check_scalar from ._hash import FeatureHasher from ._stop_words import ENGLISH_STOP_WORDS -from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES, check_scalar -from ..utils.deprecation import deprecated -from ..utils import _IS_32BIT -from ..exceptions import NotFittedError - __all__ = [ "HashingVectorizer", diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py index ce5fbc10ee459..4fbc631155078 100644 --- a/sklearn/feature_selection/__init__.py +++ b/sklearn/feature_selection/__init__.py @@ -4,31 +4,25 @@ recursive feature elimination algorithm. """ -from ._univariate_selection import chi2 -from ._univariate_selection import f_classif -from ._univariate_selection import f_oneway -from ._univariate_selection import f_regression -from ._univariate_selection import r_regression -from ._univariate_selection import SelectPercentile -from ._univariate_selection import SelectKBest -from ._univariate_selection import SelectFpr -from ._univariate_selection import SelectFdr -from ._univariate_selection import SelectFwe -from ._univariate_selection import GenericUnivariateSelect - -from ._variance_threshold import VarianceThreshold - -from ._rfe import RFE -from ._rfe import RFECV - +from ._base import SelectorMixin from ._from_model import SelectFromModel - +from ._mutual_info import mutual_info_classif, mutual_info_regression +from ._rfe import RFE, RFECV from ._sequential import SequentialFeatureSelector - -from ._mutual_info import mutual_info_regression, mutual_info_classif - -from ._base import SelectorMixin - +from ._univariate_selection import ( + GenericUnivariateSelect, + SelectFdr, + SelectFpr, + SelectFwe, + SelectKBest, + SelectPercentile, + chi2, + f_classif, + f_oneway, + f_regression, + r_regression, +) +from ._variance_threshold import VarianceThreshold __all__ = [ "GenericUnivariateSelect", diff --git a/sklearn/feature_selection/_base.py b/sklearn/feature_selection/_base.py index e306c102cdd53..50ab88540aa1d 100644 --- a/sklearn/feature_selection/_base.py +++ b/sklearn/feature_selection/_base.py @@ -8,15 +8,11 @@ from operator import attrgetter import numpy as np -from scipy.sparse import issparse, csc_matrix +from scipy.sparse import csc_matrix, issparse from ..base import TransformerMixin from ..cross_decomposition._pls import _PLS -from ..utils import ( - check_array, - safe_mask, - safe_sqr, -) +from ..utils import check_array, safe_mask, safe_sqr from ..utils._tags import _safe_tags from ..utils.validation import _check_feature_names_in diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py index 0c41c66fbef1f..9f163dcdc419d 100644 --- a/sklearn/feature_selection/_from_model.py +++ b/sklearn/feature_selection/_from_model.py @@ -1,19 +1,17 @@ # Authors: Gilles Louppe, Mathieu Blondel, Maheshakya Wijewardena # License: BSD 3 clause +import numbers from copy import deepcopy import numpy as np -import numbers - -from ._base import SelectorMixin -from ._base import _get_feature_importances -from ..base import BaseEstimator, clone, MetaEstimatorMixin -from ..utils._tags import _safe_tags -from ..utils.validation import check_is_fitted, check_scalar, _num_features +from ..base import BaseEstimator, MetaEstimatorMixin, clone from ..exceptions import NotFittedError +from ..utils._tags import _safe_tags from ..utils.metaestimators import available_if +from ..utils.validation import _num_features, check_is_fitted, check_scalar +from ._base import SelectorMixin, _get_feature_importances def _calculate_threshold(estimator, importances, threshold): diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py index 47db2601c44c0..c5529c0276783 100644 --- a/sklearn/feature_selection/_mutual_info.py +++ b/sklearn/feature_selection/_mutual_info.py @@ -6,11 +6,11 @@ from scipy.special import digamma from ..metrics.cluster import mutual_info_score -from ..neighbors import NearestNeighbors, KDTree +from ..neighbors import KDTree, NearestNeighbors from ..preprocessing import scale from ..utils import check_random_state -from ..utils.validation import check_array, check_X_y from ..utils.multiclass import check_classification_targets +from ..utils.validation import check_array, check_X_y def _compute_mi_cc(x, y, n_neighbors): diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 0f82e1775ee15..546660a14fee8 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -6,26 +6,22 @@ """Recursive feature elimination for feature ranking""" -import numpy as np import numbers -from joblib import Parallel, effective_n_jobs +import numpy as np -from ..utils.metaestimators import available_if -from ..utils.metaestimators import _safe_split -from ..utils._tags import _safe_tags -from ..utils.validation import check_is_fitted -from ..utils.fixes import delayed -from ..utils.deprecation import deprecated -from ..base import BaseEstimator -from ..base import MetaEstimatorMixin -from ..base import clone -from ..base import is_classifier +from joblib import Parallel, effective_n_jobs + +from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier +from ..metrics import check_scoring from ..model_selection import check_cv from ..model_selection._validation import _score -from ..metrics import check_scoring -from ._base import SelectorMixin -from ._base import _get_feature_importances +from ..utils._tags import _safe_tags +from ..utils.deprecation import deprecated +from ..utils.fixes import delayed +from ..utils.metaestimators import _safe_split, available_if +from ..utils.validation import check_is_fitted +from ._base import SelectorMixin, _get_feature_importances def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer): diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py index ed802c46b815c..6b5886aba5ff0 100644 --- a/sklearn/feature_selection/_sequential.py +++ b/sklearn/feature_selection/_sequential.py @@ -2,16 +2,15 @@ Sequential feature selection """ import numbers +import warnings import numpy as np -import warnings - -from ._base import SelectorMixin from ..base import BaseEstimator, MetaEstimatorMixin, clone +from ..model_selection import cross_val_score from ..utils._tags import _safe_tags from ..utils.validation import check_is_fitted -from ..model_selection import cross_val_score +from ._base import SelectorMixin class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator): diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py index 7754ea3bea7f4..5356cb136dc1e 100644 --- a/sklearn/feature_selection/_univariate_selection.py +++ b/sklearn/feature_selection/_univariate_selection.py @@ -5,16 +5,16 @@ # License: BSD 3 clause -import numpy as np import warnings +import numpy as np from scipy import special, stats from scipy.sparse import issparse from ..base import BaseEstimator from ..preprocessing import LabelBinarizer -from ..utils import as_float_array, check_array, check_X_y, safe_sqr, safe_mask -from ..utils.extmath import safe_sparse_dot, row_norms +from ..utils import as_float_array, check_array, check_X_y, safe_mask, safe_sqr +from ..utils.extmath import row_norms, safe_sparse_dot from ..utils.validation import check_is_fitted from ._base import SelectorMixin diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index 7f274b3a308ef..2b3b1def8b41f 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -2,10 +2,11 @@ # License: 3-clause BSD import numpy as np + from ..base import BaseEstimator -from ._base import SelectorMixin from ..utils.sparsefuncs import mean_variance_axis, min_max_axis from ..utils.validation import check_is_fitted +from ._base import SelectorMixin class VarianceThreshold(SelectorMixin, BaseEstimator): diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py index 9df0749427976..4a45e185a62bc 100644 --- a/sklearn/feature_selection/tests/test_base.py +++ b/sklearn/feature_selection/tests/test_base.py @@ -1,8 +1,7 @@ import numpy as np import pytest -from scipy import sparse as sp - from numpy.testing import assert_array_equal +from scipy import sparse as sp from sklearn.base import BaseEstimator from sklearn.feature_selection._base import SelectorMixin diff --git a/sklearn/feature_selection/tests/test_chi2.py b/sklearn/feature_selection/tests/test_chi2.py index d7d830459e455..4fdc652a998a9 100644 --- a/sklearn/feature_selection/tests/test_chi2.py +++ b/sklearn/feature_selection/tests/test_chi2.py @@ -7,13 +7,12 @@ import numpy as np import pytest -from scipy.sparse import coo_matrix, csr_matrix import scipy.stats +from scipy.sparse import coo_matrix, csr_matrix from sklearn.feature_selection import SelectKBest, chi2 from sklearn.feature_selection._univariate_selection import _chisquare -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal # Feature 0 is highly informative for class 1; # feature 1 is the same everywhere; diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index 815d8a35201d5..9227261ee1241 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -3,35 +3,36 @@ """ import itertools import warnings -import numpy as np -from numpy.testing import assert_allclose -from scipy import stats, sparse +import numpy as np import pytest - -from sklearn.utils._testing import assert_almost_equal, _convert_container -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils import safe_mask +from numpy.testing import assert_allclose +from scipy import sparse, stats from sklearn.datasets import make_classification, make_regression from sklearn.feature_selection import ( + GenericUnivariateSelect, + SelectFdr, + SelectFpr, + SelectFwe, + SelectKBest, + SelectPercentile, chi2, f_classif, f_oneway, f_regression, - GenericUnivariateSelect, mutual_info_classif, mutual_info_regression, r_regression, - SelectPercentile, - SelectKBest, - SelectFpr, - SelectFdr, - SelectFwe, ) - +from sklearn.utils import safe_mask +from sklearn.utils._testing import ( + _convert_container, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) ############################################################################## # Test the score functions diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index de45d9e0ab6a4..830569dd8ec66 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -1,27 +1,33 @@ import re -import pytest -import numpy as np import warnings from unittest.mock import Mock -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import skip_if_32bit -from sklearn.utils._testing import MinimalClassifier +import numpy as np +import pytest from sklearn import datasets +from sklearn.base import BaseEstimator from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression from sklearn.datasets import make_friedman1 +from sklearn.decomposition import PCA +from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier from sklearn.exceptions import NotFittedError -from sklearn.linear_model import LogisticRegression, SGDClassifier, Lasso -from sklearn.svm import LinearSVC from sklearn.feature_selection import SelectFromModel -from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.base import BaseEstimator +from sklearn.linear_model import ( + Lasso, + LogisticRegression, + PassiveAggressiveClassifier, + SGDClassifier, +) from sklearn.pipeline import make_pipeline -from sklearn.decomposition import PCA +from sklearn.svm import LinearSVC +from sklearn.utils._testing import ( + MinimalClassifier, + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + skip_if_32bit, +) class NaNTag(BaseEstimator): diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py index af2b733efd62d..cf98d5d54e83b 100644 --- a/sklearn/feature_selection/tests/test_mutual_info.py +++ b/sklearn/feature_selection/tests/test_mutual_info.py @@ -2,13 +2,10 @@ import pytest from scipy.sparse import csr_matrix -from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_array_equal, - assert_allclose, -) +from sklearn.feature_selection import mutual_info_classif, mutual_info_regression from sklearn.feature_selection._mutual_info import _compute_mi -from sklearn.feature_selection import mutual_info_regression, mutual_info_classif +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_allclose, assert_array_equal def test_compute_mi_dd(): diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 75b84a8d2cbb1..ad1420732a3c5 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -4,31 +4,26 @@ from operator import attrgetter -import pytest import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose +import pytest +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal from scipy import sparse from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA -from sklearn.feature_selection import RFE, RFECV +from sklearn.compose import TransformedTargetRegressor +from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression from sklearn.datasets import load_iris, make_friedman1 -from sklearn.metrics import zero_one_loss -from sklearn.svm import SVC, SVR, LinearSVR -from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import GroupKFold -from sklearn.compose import TransformedTargetRegressor +from sklearn.feature_selection import RFE, RFECV +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import get_scorer, make_scorer, zero_one_loss +from sklearn.model_selection import GroupKFold, cross_val_score from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler - +from sklearn.svm import SVC, SVR, LinearSVR from sklearn.utils import check_random_state from sklearn.utils._testing import ignore_warnings -from sklearn.metrics import make_scorer -from sklearn.metrics import get_scorer - class MockClassifier: """ @@ -303,8 +298,8 @@ def test_rfecv_mockclassifier(): def test_rfecv_verbose_output(): # Check verbose=1 is producing an output. - from io import StringIO import sys + from io import StringIO sys.stdout = StringIO() diff --git a/sklearn/feature_selection/tests/test_sequential.py b/sklearn/feature_selection/tests/test_sequential.py index 3daac62e19922..3196dac0f1192 100644 --- a/sklearn/feature_selection/tests/test_sequential.py +++ b/sklearn/feature_selection/tests/test_sequential.py @@ -1,16 +1,16 @@ +import numpy as np import pytest import scipy -import numpy as np from numpy.testing import assert_array_equal -from sklearn.preprocessing import StandardScaler -from sklearn.pipeline import make_pipeline +from sklearn.cluster import KMeans +from sklearn.datasets import make_blobs, make_regression +from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.feature_selection import SequentialFeatureSelector -from sklearn.datasets import make_regression, make_blobs from sklearn.linear_model import LinearRegression -from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.model_selection import cross_val_score -from sklearn.cluster import KMeans +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler @pytest.mark.parametrize("n_features_to_select", (0, 5, 0.0, -1, 1.1)) diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py index 55d20e9675654..fe4cde7dcdb93 100644 --- a/sklearn/feature_selection/tests/test_variance_threshold.py +++ b/sklearn/feature_selection/tests/test_variance_threshold.py @@ -1,11 +1,9 @@ import numpy as np import pytest - -from sklearn.utils._testing import assert_array_equal - from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix from sklearn.feature_selection import VarianceThreshold +from sklearn.utils._testing import assert_array_equal data = [[0, 1, 2, 3, 4], [0, 2, 2, 3, 5], [1, 1, 2, 4, 0]] diff --git a/sklearn/gaussian_process/__init__.py b/sklearn/gaussian_process/__init__.py index 719208b7951be..bc0d902b45b18 100644 --- a/sklearn/gaussian_process/__init__.py +++ b/sklearn/gaussian_process/__init__.py @@ -8,9 +8,8 @@ based regression and classification. """ -from ._gpr import GaussianProcessRegressor -from ._gpc import GaussianProcessClassifier from . import kernels - +from ._gpc import GaussianProcessClassifier +from ._gpr import GaussianProcessRegressor __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"] diff --git a/sklearn/gaussian_process/_gpc.py b/sklearn/gaussian_process/_gpc.py index 061aa95287500..20ce80c9d47d4 100644 --- a/sklearn/gaussian_process/_gpc.py +++ b/sklearn/gaussian_process/_gpc.py @@ -7,18 +7,18 @@ from operator import itemgetter import numpy as np -from scipy.linalg import cholesky, cho_solve, solve import scipy.optimize +from scipy.linalg import cho_solve, cholesky, solve from scipy.special import erf, expit from ..base import BaseEstimator, ClassifierMixin, clone -from .kernels import RBF, CompoundKernel, ConstantKernel as C -from ..utils.validation import check_is_fitted +from ..multiclass import OneVsOneClassifier, OneVsRestClassifier +from ..preprocessing import LabelEncoder from ..utils import check_random_state from ..utils.optimize import _check_optimize_result -from ..preprocessing import LabelEncoder -from ..multiclass import OneVsRestClassifier, OneVsOneClassifier - +from ..utils.validation import check_is_fitted +from .kernels import RBF, CompoundKernel +from .kernels import ConstantKernel as C # Values required for approximating the logistic sigmoid by # error functions. coefs are obtained via: diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index c7d8db7b63702..bf68636a334f9 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -8,15 +8,15 @@ from operator import itemgetter import numpy as np -from scipy.linalg import cholesky, cho_solve, solve_triangular import scipy.optimize +from scipy.linalg import cho_solve, cholesky, solve_triangular -from ..base import BaseEstimator, RegressorMixin, clone -from ..base import MultiOutputMixin -from .kernels import RBF, ConstantKernel as C +from ..base import BaseEstimator, MultiOutputMixin, RegressorMixin, clone from ..preprocessing._data import _handle_zeros_in_scale from ..utils import check_random_state from ..utils.optimize import _check_optimize_result +from .kernels import RBF +from .kernels import ConstantKernel as C GPR_CHOLESKY_LOWER = True diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 4e36dfa7add42..2d9bcc8974558 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -19,21 +19,20 @@ # Note: this module is strongly inspired by the kernel module of the george # package. +import math +import warnings from abc import ABCMeta, abstractmethod from collections import namedtuple -import math from inspect import signature import numpy as np -from scipy.special import kv, gamma -from scipy.spatial.distance import pdist, cdist, squareform +from scipy.spatial.distance import cdist, pdist, squareform +from scipy.special import gamma, kv -from ..metrics.pairwise import pairwise_kernels from ..base import clone -from ..utils.validation import _num_samples from ..exceptions import ConvergenceWarning - -import warnings +from ..metrics.pairwise import pairwise_kernels +from ..utils.validation import _num_samples def _check_length_scale(X, length_scale): diff --git a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py index ad81890680168..4667329aff9b8 100644 --- a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py +++ b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py @@ -1,8 +1,12 @@ -from sklearn.gaussian_process.kernels import Kernel, Hyperparameter -from sklearn.gaussian_process.kernels import GenericKernelMixin -from sklearn.gaussian_process.kernels import StationaryKernelMixin import numpy as np + from sklearn.base import clone +from sklearn.gaussian_process.kernels import ( + GenericKernelMixin, + Hyperparameter, + Kernel, + StationaryKernelMixin, +) class MiniSeqKernel(GenericKernelMixin, StationaryKernelMixin, Kernel): diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py index 2173f77c161c1..777e61d544f23 100644 --- a/sklearn/gaussian_process/tests/test_gpc.py +++ b/sklearn/gaussian_process/tests/test_gpc.py @@ -4,22 +4,17 @@ # License: BSD 3 clause import warnings -import numpy as np - -from scipy.optimize import approx_fprime +import numpy as np import pytest +from scipy.optimize import approx_fprime +from sklearn.exceptions import ConvergenceWarning from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import ( - RBF, - CompoundKernel, - ConstantKernel as C, - WhiteKernel, -) +from sklearn.gaussian_process.kernels import RBF, CompoundKernel +from sklearn.gaussian_process.kernels import ConstantKernel as C +from sklearn.gaussian_process.kernels import WhiteKernel from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel -from sklearn.exceptions import ConvergenceWarning - from sklearn.utils._testing import assert_almost_equal, assert_array_equal diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index a8e6eda3bf667..a7bb42e314966 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -4,25 +4,25 @@ # Modified by: Pete Green # License: BSD 3 clause -import warnings -import sys import re -import numpy as np - -from scipy.optimize import approx_fprime +import sys +import warnings +import numpy as np import pytest +from scipy.optimize import approx_fprime +from sklearn.exceptions import ConvergenceWarning from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, WhiteKernel -from sklearn.gaussian_process.kernels import DotProduct, ExpSineSquared +from sklearn.gaussian_process.kernels import RBF +from sklearn.gaussian_process.kernels import ConstantKernel as C +from sklearn.gaussian_process.kernels import DotProduct, ExpSineSquared, WhiteKernel from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel -from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ( - assert_array_less, + assert_allclose, assert_almost_equal, assert_array_almost_equal, - assert_allclose, + assert_array_less, ) diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py index 05dc682b13e9c..8248ad6ffba80 100644 --- a/sklearn/gaussian_process/tests/test_kernels.py +++ b/sklearn/gaussian_process/tests/test_kernels.py @@ -3,41 +3,39 @@ # Author: Jan Hendrik Metzen # License: BSD 3 clause -import pytest -import numpy as np from inspect import signature -from sklearn.gaussian_process.kernels import _approx_fprime +import numpy as np +import pytest -from sklearn.metrics.pairwise import ( - PAIRWISE_KERNEL_FUNCTIONS, - euclidean_distances, - pairwise_kernels, -) +from sklearn.base import clone from sklearn.gaussian_process.kernels import ( RBF, + CompoundKernel, + ConstantKernel, + DotProduct, + Exponentiation, + ExpSineSquared, + KernelOperator, Matern, + PairwiseKernel, RationalQuadratic, - ExpSineSquared, - DotProduct, - ConstantKernel, WhiteKernel, - PairwiseKernel, - KernelOperator, - Exponentiation, - CompoundKernel, + _approx_fprime, +) +from sklearn.metrics.pairwise import ( + PAIRWISE_KERNEL_FUNCTIONS, + euclidean_distances, + pairwise_kernels, ) -from sklearn.base import clone - from sklearn.utils._testing import ( + assert_allclose, assert_almost_equal, - assert_array_equal, assert_array_almost_equal, - assert_allclose, + assert_array_equal, fails_if_pypy, ) - X = np.random.RandomState(0).normal(0, 1, (5, 2)) Y = np.random.RandomState(0).normal(0, 1, (6, 2)) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 0c8a6f2c07a21..7ecf58c4f28b6 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -12,13 +12,10 @@ from scipy import stats from ..base import BaseEstimator, TransformerMixin -from ..utils.sparsefuncs import _get_median -from ..utils.validation import check_is_fitted -from ..utils.validation import FLOAT_DTYPES -from ..utils.validation import _check_feature_names_in +from ..utils import _is_pandas_na, is_scalar_nan from ..utils._mask import _get_mask -from ..utils import _is_pandas_na -from ..utils import is_scalar_nan +from ..utils.sparsefuncs import _get_median +from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted def _check_inputs_dtype(X, missing_values): diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index f6c32a6818455..6802a5494a93f 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -1,22 +1,17 @@ -from time import time -from collections import namedtuple import warnings +from collections import namedtuple +from time import time -from scipy import stats import numpy as np +from scipy import stats from ..base import clone from ..exceptions import ConvergenceWarning from ..preprocessing import normalize -from ..utils import check_array, check_random_state, _safe_indexing, is_scalar_nan -from ..utils.validation import FLOAT_DTYPES, check_is_fitted -from ..utils.validation import _check_feature_names_in +from ..utils import _safe_indexing, check_array, check_random_state, is_scalar_nan from ..utils._mask import _get_mask - -from ._base import _BaseImputer -from ._base import SimpleImputer -from ._base import _check_inputs_dtype - +from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted +from ._base import SimpleImputer, _BaseImputer, _check_inputs_dtype _ImputerTriplet = namedtuple( "_ImputerTriplet", ["feat_idx", "neighbor_feat_idx", "estimator"] diff --git a/sklearn/impute/_knn.py b/sklearn/impute/_knn.py index 497bcfafb074a..da65eccb006b8 100644 --- a/sklearn/impute/_knn.py +++ b/sklearn/impute/_knn.py @@ -4,16 +4,13 @@ import numpy as np -from ._base import _BaseImputer -from ..utils.validation import FLOAT_DTYPES from ..metrics import pairwise_distances_chunked from ..metrics.pairwise import _NAN_METRICS -from ..neighbors._base import _get_weights -from ..neighbors._base import _check_weights +from ..neighbors._base import _check_weights, _get_weights from ..utils import is_scalar_nan from ..utils._mask import _get_mask -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_feature_names_in +from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted +from ._base import _BaseImputer class KNNImputer(_BaseImputer): diff --git a/sklearn/impute/tests/test_base.py b/sklearn/impute/tests/test_base.py index 837575765f884..b841a440582fc 100644 --- a/sklearn/impute/tests/test_base.py +++ b/sklearn/impute/tests/test_base.py @@ -1,6 +1,5 @@ -import pytest - import numpy as np +import pytest from sklearn.impute._base import _BaseImputer from sklearn.utils._mask import _get_mask diff --git a/sklearn/impute/tests/test_common.py b/sklearn/impute/tests/test_common.py index 6d6fc3c649656..ec6675e88941a 100644 --- a/sklearn/impute/tests/test_common.py +++ b/sklearn/impute/tests/test_common.py @@ -1,18 +1,14 @@ -import pytest - import numpy as np +import pytest from scipy import sparse -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_array_equal - from sklearn.experimental import enable_iterative_imputer # noqa - -from sklearn.impute import IterativeImputer -from sklearn.impute import KNNImputer -from sklearn.impute import SimpleImputer - +from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer +from sklearn.utils._testing import ( + assert_allclose, + assert_allclose_dense_sparse, + assert_array_equal, +) IMPUTERS = [IterativeImputer(tol=0.1), KNNImputer(), SimpleImputer()] SPARSE_IMPUTERS = [SimpleImputer()] diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py index dc585571124b5..3b78ffe90a930 100644 --- a/sklearn/impute/tests/test_impute.py +++ b/sklearn/impute/tests/test_impute.py @@ -1,32 +1,30 @@ -import pytest +import io import warnings import numpy as np +import pytest from scipy import sparse from scipy.stats import kstest -import io - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal +from sklearn import tree +from sklearn.datasets import load_diabetes +from sklearn.dummy import DummyRegressor +from sklearn.exceptions import ConvergenceWarning # make IterativeImputer available from sklearn.experimental import enable_iterative_imputer # noqa - -from sklearn.datasets import load_diabetes -from sklearn.impute import MissingIndicator -from sklearn.impute import SimpleImputer, IterativeImputer -from sklearn.dummy import DummyRegressor -from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV -from sklearn.pipeline import Pipeline -from sklearn.pipeline import make_union +from sklearn.impute import IterativeImputer, MissingIndicator, SimpleImputer +from sklearn.impute._base import _most_frequent +from sklearn.linear_model import ARDRegression, BayesianRidge, RidgeCV from sklearn.model_selection import GridSearchCV -from sklearn import tree +from sklearn.pipeline import Pipeline, make_union from sklearn.random_projection import _sparse_random_matrix -from sklearn.exceptions import ConvergenceWarning -from sklearn.impute._base import _most_frequent +from sklearn.utils._testing import ( + assert_allclose, + assert_allclose_dense_sparse, + assert_array_almost_equal, + assert_array_equal, +) def _assert_array_equal_and_same_dtype(x, y): diff --git a/sklearn/impute/tests/test_knn.py b/sklearn/impute/tests/test_knn.py index 098899bc1a0f1..8c0ca9cad557d 100644 --- a/sklearn/impute/tests/test_knn.py +++ b/sklearn/impute/tests/test_knn.py @@ -3,8 +3,7 @@ from sklearn import config_context from sklearn.impute import KNNImputer -from sklearn.metrics.pairwise import nan_euclidean_distances -from sklearn.metrics.pairwise import pairwise_distances +from sklearn.metrics.pairwise import nan_euclidean_distances, pairwise_distances from sklearn.neighbors import KNeighborsRegressor from sklearn.utils._testing import assert_allclose diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index 76c44ea81bbbe..e9213d5abb77b 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,13 +1,10 @@ """The :mod:`sklearn.inspection` module includes tools for model inspection.""" +from ._partial_dependence import partial_dependence from ._permutation_importance import permutation_importance from ._plot.decision_boundary import DecisionBoundaryDisplay - -from ._partial_dependence import partial_dependence -from ._plot.partial_dependence import plot_partial_dependence -from ._plot.partial_dependence import PartialDependenceDisplay - +from ._plot.partial_dependence import PartialDependenceDisplay, plot_partial_dependence __all__ = [ "partial_dependence", diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py index ebb7a11e16835..1d2100391f02f 100644 --- a/sklearn/inspection/_partial_dependence.py +++ b/sklearn/inspection/_partial_dependence.py @@ -12,22 +12,23 @@ from scipy.stats.mstats import mquantiles from ..base import is_classifier, is_regressor -from ..utils.extmath import cartesian -from ..utils import check_array -from ..utils import check_matplotlib_support # noqa -from ..utils import _safe_indexing -from ..utils import _determine_key_type -from ..utils import _get_column_indices -from ..utils.validation import check_is_fitted -from ..utils import Bunch -from ..tree import DecisionTreeRegressor from ..ensemble import RandomForestRegressor -from ..exceptions import NotFittedError from ..ensemble._gb import BaseGradientBoosting from ..ensemble._hist_gradient_boosting.gradient_boosting import ( BaseHistGradientBoosting, ) - +from ..exceptions import NotFittedError +from ..tree import DecisionTreeRegressor +from ..utils import check_matplotlib_support # noqa +from ..utils import ( + Bunch, + _determine_key_type, + _get_column_indices, + _safe_indexing, + check_array, +) +from ..utils.extmath import cartesian +from ..utils.validation import check_is_fitted __all__ = [ "partial_dependence", diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py index 204dcd9117c77..808190591ad6e 100644 --- a/sklearn/inspection/_permutation_importance.py +++ b/sklearn/inspection/_permutation_importance.py @@ -1,15 +1,15 @@ """Permutation importance for estimators.""" import numbers + import numpy as np + from joblib import Parallel from ..ensemble._bagging import _generate_indices from ..metrics import check_scoring from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer from ..model_selection._validation import _aggregate_score_dicts -from ..utils import Bunch, _safe_indexing -from ..utils import check_random_state -from ..utils import check_array +from ..utils import Bunch, _safe_indexing, check_array, check_random_state from ..utils.fixes import delayed diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 15e8e15e87569..0bb22ce61ad4d 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -2,11 +2,10 @@ import numpy as np -from ...preprocessing import LabelEncoder -from ...utils import check_matplotlib_support -from ...utils import _safe_indexing from ...base import is_regressor -from ...utils.validation import check_is_fitted, _is_arraylike_not_scalar +from ...preprocessing import LabelEncoder +from ...utils import _safe_indexing, check_matplotlib_support +from ...utils.validation import _is_arraylike_not_scalar, check_is_fitted def _check_boundary_response_method(estimator, response_method): diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index 1ab0cd083bb58..211880fa2e9c2 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -6,17 +6,14 @@ import numpy as np from scipy import sparse from scipy.stats.mstats import mquantiles + from joblib import Parallel -from .. import partial_dependence from ...base import is_regressor -from ...utils import Bunch -from ...utils import check_array -from ...utils import deprecated from ...utils import check_matplotlib_support # noqa -from ...utils import check_random_state -from ...utils import _safe_indexing +from ...utils import Bunch, _safe_indexing, check_array, check_random_state, deprecated from ...utils.fixes import delayed +from .. import partial_dependence @deprecated( diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index 8981c9d5a5e83..0f197e019f949 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -1,21 +1,19 @@ import warnings -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.datasets import make_classification -from sklearn.linear_model import LogisticRegression -from sklearn.datasets import load_iris -from sklearn.datasets import make_multilabel_classification -from sklearn.tree import DecisionTreeRegressor -from sklearn.tree import DecisionTreeClassifier - +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.datasets import ( + load_iris, + make_classification, + make_multilabel_classification, +) from sklearn.inspection import DecisionBoundaryDisplay from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method - +from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py index 65f5ce83d7a06..0bf6217f0707e 100644 --- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py +++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py @@ -1,22 +1,22 @@ -import numpy as np -from scipy.stats.mstats import mquantiles +import warnings +import numpy as np import pytest from numpy.testing import assert_allclose -import warnings +from scipy.stats.mstats import mquantiles -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_iris -from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.ensemble import GradientBoostingClassifier +from sklearn.datasets import ( + load_diabetes, + load_iris, + make_classification, + make_regression, +) +from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor +from sklearn.inspection import PartialDependenceDisplay +from sklearn.inspection import plot_partial_dependence as plot_partial_dependence_func from sklearn.linear_model import LinearRegression from sklearn.utils._testing import _convert_container -from sklearn.inspection import plot_partial_dependence as plot_partial_dependence_func -from sklearn.inspection import PartialDependenceDisplay - - # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 4e62f140c6953..7bf47a850ce19 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -6,40 +6,39 @@ import pytest import sklearn +from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.cluster import KMeans +from sklearn.compose import make_column_transformer +from sklearn.datasets import load_iris, make_classification, make_regression +from sklearn.dummy import DummyClassifier +from sklearn.ensemble import ( + GradientBoostingClassifier, + GradientBoostingRegressor, + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, + RandomForestRegressor, +) +from sklearn.exceptions import NotFittedError from sklearn.inspection import partial_dependence from sklearn.inspection._partial_dependence import ( _grid_from_X, _partial_dependence_brute, _partial_dependence_recursion, ) -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import LogisticRegression -from sklearn.linear_model import MultiTaskLasso -from sklearn.tree import DecisionTreeRegressor -from sklearn.datasets import load_iris -from sklearn.datasets import make_classification, make_regression -from sklearn.cluster import KMeans -from sklearn.compose import make_column_transformer +from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso from sklearn.metrics import r2_score -from sklearn.preprocessing import PolynomialFeatures -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import RobustScaler -from sklearn.preprocessing import scale from sklearn.pipeline import make_pipeline -from sklearn.dummy import DummyClassifier -from sklearn.base import BaseEstimator, ClassifierMixin, clone -from sklearn.exceptions import NotFittedError -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_equal +from sklearn.preprocessing import ( + PolynomialFeatures, + RobustScaler, + StandardScaler, + scale, +) +from sklearn.tree import DecisionTreeRegressor +from sklearn.tree.tests.test_tree import assert_is_subtree from sklearn.utils import _IS_32BIT +from sklearn.utils._testing import assert_allclose, assert_array_equal from sklearn.utils.validation import check_random_state -from sklearn.tree.tests.test_tree import assert_is_subtree - # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py index 20d0c289a9a7d..219df01f0b4f9 100644 --- a/sklearn/inspection/tests/test_permutation_importance.py +++ b/sklearn/inspection/tests/test_permutation_importance.py @@ -1,31 +1,23 @@ -import pytest import numpy as np - +import pytest from numpy.testing import assert_allclose from sklearn.compose import ColumnTransformer -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_iris -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression +from sklearn.datasets import ( + load_diabetes, + load_iris, + make_classification, + make_regression, +) from sklearn.dummy import DummyClassifier -from sklearn.ensemble import RandomForestRegressor -from sklearn.ensemble import RandomForestClassifier -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.impute import SimpleImputer from sklearn.inspection import permutation_importance +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.metrics import get_scorer, mean_squared_error, r2_score from sklearn.model_selection import train_test_split -from sklearn.metrics import ( - get_scorer, - mean_squared_error, - r2_score, -) from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import KBinsDiscretizer -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import scale +from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale from sklearn.utils import parallel_backend from sklearn.utils._testing import _convert_container diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index db19a52daf867..48ce181b481cc 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -3,17 +3,17 @@ # Nelle Varoquaux # License: BSD 3 clause +import math +import warnings + import numpy as np from scipy import interpolate from scipy.stats import spearmanr -import warnings -import math -from .base import BaseEstimator, TransformerMixin, RegressorMixin +from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique +from .base import BaseEstimator, RegressorMixin, TransformerMixin from .utils import check_array, check_consistent_length from .utils.validation import _check_sample_weight -from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique - __all__ = ["check_increasing", "isotonic_regression", "IsotonicRegression"] diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index 1e4f4c6aa1301..46ce0fbcd3e29 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -19,15 +19,15 @@ except ImportError: # scipy < 1.4 from scipy.fftpack import fft, ifft -from .base import BaseEstimator -from .base import TransformerMixin -from .base import _ClassNamePrefixFeaturesOutMixin +from .base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin +from .metrics.pairwise import KERNEL_PARAMS, pairwise_kernels from .utils import check_random_state from .utils.extmath import safe_sparse_dot -from .utils.validation import check_is_fitted -from .utils.validation import _check_feature_names_in -from .metrics.pairwise import pairwise_kernels, KERNEL_PARAMS -from .utils.validation import check_non_negative +from .utils.validation import ( + _check_feature_names_in, + check_is_fitted, + check_non_negative, +) class PolynomialCountSketch( diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py index cc83e114338be..ba32c1c0365aa 100644 --- a/sklearn/kernel_ridge.py +++ b/sklearn/kernel_ridge.py @@ -6,10 +6,10 @@ import numpy as np -from .base import BaseEstimator, RegressorMixin, MultiOutputMixin -from .metrics.pairwise import pairwise_kernels +from .base import BaseEstimator, MultiOutputMixin, RegressorMixin from .linear_model._ridge import _solve_cholesky_kernel -from .utils.validation import check_is_fitted, _check_sample_weight +from .metrics.pairwise import pairwise_kernels +from .utils.validation import _check_sample_weight, check_is_fitted class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator): diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py index d5a14756c41a9..45c99d4d36df1 100644 --- a/sklearn/linear_model/__init__.py +++ b/sklearn/linear_model/__init__.py @@ -7,46 +7,44 @@ # complete documentation. from ._base import LinearRegression -from ._bayes import BayesianRidge, ARDRegression -from ._least_angle import ( - Lars, - LassoLars, - lars_path, - lars_path_gram, - LarsCV, - LassoLarsCV, - LassoLarsIC, -) +from ._bayes import ARDRegression, BayesianRidge from ._coordinate_descent import ( - Lasso, ElasticNet, - LassoCV, ElasticNetCV, - lasso_path, - enet_path, - MultiTaskLasso, + Lasso, + LassoCV, MultiTaskElasticNet, MultiTaskElasticNetCV, + MultiTaskLasso, MultiTaskLassoCV, + enet_path, + lasso_path, ) -from ._glm import PoissonRegressor, GammaRegressor, TweedieRegressor +from ._glm import GammaRegressor, PoissonRegressor, TweedieRegressor from ._huber import HuberRegressor -from ._sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber -from ._stochastic_gradient import SGDClassifier, SGDRegressor, SGDOneClassSVM -from ._ridge import Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV, ridge_regression +from ._least_angle import ( + Lars, + LarsCV, + LassoLars, + LassoLarsCV, + LassoLarsIC, + lars_path, + lars_path_gram, +) from ._logistic import LogisticRegression, LogisticRegressionCV from ._omp import ( - orthogonal_mp, - orthogonal_mp_gram, OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV, + orthogonal_mp, + orthogonal_mp_gram, ) -from ._passive_aggressive import PassiveAggressiveClassifier -from ._passive_aggressive import PassiveAggressiveRegressor +from ._passive_aggressive import PassiveAggressiveClassifier, PassiveAggressiveRegressor from ._perceptron import Perceptron - from ._quantile import QuantileRegressor from ._ransac import RANSACRegressor +from ._ridge import Ridge, RidgeClassifier, RidgeClassifierCV, RidgeCV, ridge_regression +from ._sgd_fast import Hinge, Huber, Log, ModifiedHuber, SquaredLoss +from ._stochastic_gradient import SGDClassifier, SGDOneClassSVM, SGDRegressor from ._theil_sen import TheilSenRegressor __all__ = [ diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index 5b23c346cbc5f..d6458961a4314 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -14,31 +14,31 @@ # Maria Telenczuk # License: BSD 3 clause -from abc import ABCMeta, abstractmethod import numbers import warnings +from abc import ABCMeta, abstractmethod import numpy as np import scipy.sparse as sp -from scipy import linalg -from scipy import optimize -from scipy import sparse +from scipy import linalg, optimize, sparse from scipy.sparse.linalg import lsqr from scipy.special import expit + from joblib import Parallel -from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin +from ..base import BaseEstimator, ClassifierMixin, MultiOutputMixin, RegressorMixin from ..preprocessing._data import _is_constant_feature -from ..utils import check_array -from ..utils.validation import FLOAT_DTYPES -from ..utils import check_random_state -from ..utils.extmath import safe_sparse_dot -from ..utils.extmath import _incremental_mean_and_var -from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale -from ..utils._seq_dataset import ArrayDataset32, CSRDataset32 -from ..utils._seq_dataset import ArrayDataset64, CSRDataset64 -from ..utils.validation import check_is_fitted, _check_sample_weight +from ..utils import check_array, check_random_state +from ..utils._seq_dataset import ( + ArrayDataset32, + ArrayDataset64, + CSRDataset32, + CSRDataset64, +) +from ..utils.extmath import _incremental_mean_and_var, safe_sparse_dot from ..utils.fixes import delayed +from ..utils.sparsefuncs import inplace_column_scale, mean_variance_axis +from ..utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted # TODO: bayesian_ridge_regression and bayesian_regression_ard # should be squashed into its respective objects. diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index e9a88d6e2a65b..33a81e48aa3ce 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -6,16 +6,15 @@ # License: BSD 3 clause from math import log + import numpy as np from scipy import linalg +from scipy.linalg import pinvh -from ._base import LinearModel, _preprocess_data, _rescale_data from ..base import RegressorMixin -from ._base import _deprecate_normalize from ..utils.extmath import fast_logdet -from scipy.linalg import pinvh from ..utils.validation import _check_sample_weight - +from ._base import LinearModel, _deprecate_normalize, _preprocess_data, _rescale_data ############################################################################### # BayesianRidge regression diff --git a/sklearn/linear_model/_cd_fast.pyx b/sklearn/linear_model/_cd_fast.pyx index 4c605c2911740..19bfa0197da3f 100644 --- a/sklearn/linear_model/_cd_fast.pyx +++ b/sklearn/linear_model/_cd_fast.pyx @@ -6,21 +6,33 @@ # # License: BSD 3 clause -from libc.math cimport fabs cimport numpy as cnp +from libc.math cimport fabs + import numpy as np import numpy.linalg as linalg from cpython cimport bool from cython cimport floating -import warnings -from ..exceptions import ConvergenceWarning -from ..utils._cython_blas cimport (_axpy, _dot, _asum, _ger, _gemv, _nrm2, - _copy, _scal) -from ..utils._cython_blas cimport RowMajor, ColMajor, Trans, NoTrans +import warnings +from ..exceptions import ConvergenceWarning +from ..utils._cython_blas cimport ( + ColMajor, + NoTrans, + RowMajor, + Trans, + _asum, + _axpy, + _copy, + _dot, + _gemv, + _ger, + _nrm2, + _scal, +) from ..utils._random cimport our_rand_r ctypedef cnp.float64_t DOUBLE diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 779af3626e18d..2a9628943e466 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -5,34 +5,33 @@ # # License: BSD 3 clause +import numbers import sys import warnings -import numbers from abc import ABC, abstractmethod from functools import partial import numpy as np from scipy import sparse + from joblib import Parallel, effective_n_jobs -from ._base import LinearModel, _pre_fit -from ..base import RegressorMixin, MultiOutputMixin -from ._base import _preprocess_data, _deprecate_normalize -from ..utils import check_array -from ..utils import check_scalar -from ..utils.validation import check_random_state +from ..base import MultiOutputMixin, RegressorMixin from ..model_selection import check_cv +from ..utils import check_array, check_scalar from ..utils.extmath import safe_sparse_dot +from ..utils.fixes import delayed from ..utils.validation import ( _check_sample_weight, check_consistent_length, check_is_fitted, + check_random_state, column_or_1d, ) -from ..utils.fixes import delayed # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast' from . import _cd_fast as cd_fast # type: ignore +from ._base import LinearModel, _deprecate_normalize, _pre_fit, _preprocess_data def _set_order(X, y, order="C"): diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py index fea9c4d4cf6ba..1b82bbd77bcf9 100644 --- a/sklearn/linear_model/_glm/__init__.py +++ b/sklearn/linear_model/_glm/__init__.py @@ -1,10 +1,10 @@ # License: BSD 3 clause from .glm import ( - _GeneralizedLinearRegressor, - PoissonRegressor, GammaRegressor, + PoissonRegressor, TweedieRegressor, + _GeneralizedLinearRegressor, ) __all__ = [ diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py index d337eaa7a4a18..8f05fca1d1361 100644 --- a/sklearn/linear_model/_glm/glm.py +++ b/sklearn/linear_model/_glm/glm.py @@ -20,10 +20,10 @@ HalfTweedieLossIdentity, ) from ...base import BaseEstimator, RegressorMixin -from ...utils.optimize import _check_optimize_result -from ...utils import check_scalar, check_array, deprecated -from ...utils.validation import check_is_fitted, _check_sample_weight +from ...utils import check_array, check_scalar, deprecated from ...utils._openmp_helpers import _openmp_effective_n_threads +from ...utils.optimize import _check_optimize_result +from ...utils.validation import _check_sample_weight, check_is_fitted from .._linear_loss import LinearModelLoss diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py index d29fde2eb30d7..e3817037f9dcb 100644 --- a/sklearn/linear_model/_glm/tests/test_glm.py +++ b/sklearn/linear_model/_glm/tests/test_glm.py @@ -3,20 +3,24 @@ # License: BSD 3 clause import re +import warnings + import numpy as np -from numpy.testing import assert_allclose import pytest -import warnings +from numpy.testing import assert_allclose -from sklearn.base import clone from sklearn._loss.glm_distribution import TweedieDistribution from sklearn._loss.link import IdentityLink, LogLink - +from sklearn.base import clone from sklearn.datasets import make_regression -from sklearn.linear_model._glm import _GeneralizedLinearRegressor -from sklearn.linear_model import TweedieRegressor, PoissonRegressor, GammaRegressor -from sklearn.linear_model import Ridge from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import ( + GammaRegressor, + PoissonRegressor, + Ridge, + TweedieRegressor, +) +from sklearn.linear_model._glm import _GeneralizedLinearRegressor from sklearn.metrics import d2_tweedie_score from sklearn.model_selection import train_test_split diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py index 3fdf5aa73743f..d72346faed2e4 100644 --- a/sklearn/linear_model/_huber.py +++ b/sklearn/linear_model/_huber.py @@ -2,15 +2,14 @@ # License: BSD 3 clause import numpy as np - from scipy import optimize from ..base import BaseEstimator, RegressorMixin -from ._base import LinearModel from ..utils import axis0_safe_slice -from ..utils.validation import _check_sample_weight from ..utils.extmath import safe_sparse_dot from ..utils.optimize import _check_optimize_result +from ..utils.validation import _check_sample_weight +from ._base import LinearModel def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None): diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index 22284cd71d0ff..08a3ddb01ceac 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -8,25 +8,24 @@ # # License: BSD 3 clause -from math import log import sys import warnings +from math import log import numpy as np -from scipy import linalg, interpolate +from scipy import interpolate, linalg from scipy.linalg.lapack import get_lapack_funcs + from joblib import Parallel -from ._base import LinearModel, LinearRegression -from ._base import _deprecate_normalize, _preprocess_data -from ..base import RegressorMixin, MultiOutputMixin +from ..base import MultiOutputMixin, RegressorMixin +from ..exceptions import ConvergenceWarning +from ..model_selection import check_cv # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs' -from ..utils import arrayfuncs, as_float_array # type: ignore -from ..utils import check_random_state -from ..model_selection import check_cv -from ..exceptions import ConvergenceWarning +from ..utils import arrayfuncs, as_float_array, check_random_state # type: ignore from ..utils.fixes import delayed +from ._base import LinearModel, LinearRegression, _deprecate_normalize, _preprocess_data SOLVE_TRIANGULAR_ARGS = {"check_finite": False} diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py index 64a99325dcd7a..0ed2e148fecdc 100644 --- a/sklearn/linear_model/_linear_loss.py +++ b/sklearn/linear_model/_linear_loss.py @@ -3,6 +3,7 @@ """ import numpy as np from scipy import sparse + from ..utils.extmath import squared_norm diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 72b602e409801..2df456b0af5f3 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -15,25 +15,28 @@ import numpy as np from scipy import optimize + from joblib import Parallel, effective_n_jobs -from ._base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator -from ._linear_loss import LinearModelLoss -from ._sag import sag_solver from .._loss.loss import HalfBinomialLoss, HalfMultinomialLoss -from ..preprocessing import LabelEncoder, LabelBinarizer +from ..metrics import get_scorer +from ..model_selection import check_cv +from ..preprocessing import LabelBinarizer, LabelEncoder from ..svm._base import _fit_liblinear -from ..utils import check_array, check_consistent_length, compute_class_weight -from ..utils import check_random_state -from ..utils.extmath import softmax -from ..utils.extmath import row_norms -from ..utils.optimize import _newton_cg, _check_optimize_result -from ..utils.validation import check_is_fitted, _check_sample_weight -from ..utils.multiclass import check_classification_targets +from ..utils import ( + check_array, + check_consistent_length, + check_random_state, + compute_class_weight, +) +from ..utils.extmath import row_norms, softmax from ..utils.fixes import delayed -from ..model_selection import check_cv -from ..metrics import get_scorer - +from ..utils.multiclass import check_classification_targets +from ..utils.optimize import _check_optimize_result, _newton_cg +from ..utils.validation import _check_sample_weight, check_is_fitted +from ._base import BaseEstimator, LinearClassifierMixin, SparseCoefMixin +from ._linear_loss import LinearModelLoss +from ._sag import sag_solver _LOGISTIC_SOLVER_CONVERGENCE_MSG = ( "Please also refer to the documentation for alternative solver options:\n" @@ -872,8 +875,7 @@ class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator): .. seealso:: Refer to the User Guide for more information regarding :class:`LogisticRegression` and more specifically the - `Table `_ - summarazing solver/penalty supports. + :ref:`Table ` summarazing solver/penalty supports. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py index b86c35c41de85..d70d309ab157f 100644 --- a/sklearn/linear_model/_omp.py +++ b/sklearn/linear_model/_omp.py @@ -11,13 +11,14 @@ import numpy as np from scipy import linalg from scipy.linalg.lapack import get_lapack_funcs + from joblib import Parallel -from ._base import LinearModel, _pre_fit, _deprecate_normalize -from ..base import RegressorMixin, MultiOutputMixin +from ..base import MultiOutputMixin, RegressorMixin +from ..model_selection import check_cv from ..utils import as_float_array, check_array from ..utils.fixes import delayed -from ..model_selection import check_cv +from ._base import LinearModel, _deprecate_normalize, _pre_fit premature = ( "Orthogonal matching pursuit ended prematurely due to linear" diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py index 65f754ba35f55..5c010d9069599 100644 --- a/sklearn/linear_model/_passive_aggressive.py +++ b/sklearn/linear_model/_passive_aggressive.py @@ -1,9 +1,7 @@ # Authors: Rob Zinkov, Mathieu Blondel # License: BSD 3 clause -from ._stochastic_gradient import BaseSGDClassifier -from ._stochastic_gradient import BaseSGDRegressor -from ._stochastic_gradient import DEFAULT_EPSILON +from ._stochastic_gradient import DEFAULT_EPSILON, BaseSGDClassifier, BaseSGDRegressor class PassiveAggressiveClassifier(BaseSGDClassifier): diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py index bc5d59e6fca0c..628f7b46c81b3 100644 --- a/sklearn/linear_model/_quantile.py +++ b/sklearn/linear_model/_quantile.py @@ -8,11 +8,11 @@ from scipy.optimize import linprog from ..base import BaseEstimator, RegressorMixin -from ._base import LinearModel from ..exceptions import ConvergenceWarning from ..utils import _safe_indexing +from ..utils.fixes import parse_version, sp_version from ..utils.validation import _check_sample_weight -from ..utils.fixes import sp_version, parse_version +from ._base import LinearModel class QuantileRegressor(LinearModel, RegressorMixin, BaseEstimator): diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index 8d20005430769..4d51551e7b667 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -2,17 +2,22 @@ # # License: BSD 3 clause -import numpy as np import warnings -from ..base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone -from ..base import MultiOutputMixin -from ..utils import check_random_state, check_consistent_length +import numpy as np + +from ..base import ( + BaseEstimator, + MetaEstimatorMixin, + MultiOutputMixin, + RegressorMixin, + clone, +) +from ..exceptions import ConvergenceWarning +from ..utils import check_consistent_length, check_random_state from ..utils.random import sample_without_replacement -from ..utils.validation import check_is_fitted, _check_sample_weight +from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter from ._base import LinearRegression -from ..utils.validation import has_fit_parameter -from ..exceptions import ConvergenceWarning _EPSILON = np.spacing(1) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index dee703b73c059..2314e88c3bb7a 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -9,35 +9,38 @@ # License: BSD 3 clause +import numbers +import warnings from abc import ABCMeta, abstractmethod from functools import partial -import warnings import numpy as np -import numbers -from scipy import linalg -from scipy import sparse -from scipy import optimize +from scipy import linalg, optimize, sparse from scipy.sparse import linalg as sp_linalg -from ._base import LinearClassifierMixin, LinearModel -from ._base import _deprecate_normalize, _preprocess_data, _rescale_data -from ._sag import sag_solver from ..base import MultiOutputMixin, RegressorMixin, is_classifier -from ..utils.extmath import safe_sparse_dot -from ..utils.extmath import row_norms -from ..utils import check_array -from ..utils import check_consistent_length -from ..utils import check_scalar -from ..utils import compute_sample_weight -from ..utils import column_or_1d -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_sample_weight -from ..preprocessing import LabelBinarizer -from ..model_selection import GridSearchCV -from ..metrics import check_scoring from ..exceptions import ConvergenceWarning +from ..metrics import check_scoring +from ..model_selection import GridSearchCV +from ..preprocessing import LabelBinarizer +from ..utils import ( + check_array, + check_consistent_length, + check_scalar, + column_or_1d, + compute_sample_weight, +) +from ..utils.extmath import row_norms, safe_sparse_dot from ..utils.sparsefuncs import mean_variance_axis +from ..utils.validation import _check_sample_weight, check_is_fitted +from ._base import ( + LinearClassifierMixin, + LinearModel, + _deprecate_normalize, + _preprocess_data, + _rescale_data, +) +from ._sag import sag_solver def _get_rescaled_operator(X, X_offset, sample_weight_sqrt): diff --git a/sklearn/linear_model/_sag.py b/sklearn/linear_model/_sag.py index b7860edd43031..2626955ec2a7f 100644 --- a/sklearn/linear_model/_sag.py +++ b/sklearn/linear_model/_sag.py @@ -8,12 +8,12 @@ import numpy as np -from ._base import make_dataset -from ._sag_fast import sag32, sag64 from ..exceptions import ConvergenceWarning from ..utils import check_array -from ..utils.validation import _check_sample_weight from ..utils.extmath import row_norms +from ..utils.validation import _check_sample_weight +from ._base import make_dataset +from ._sag_fast import sag32, sag64 def get_auto_step_size( diff --git a/sklearn/linear_model/_sgd_fast.pyx b/sklearn/linear_model/_sgd_fast.pyx index bcb1d05d3a8be..3e76d4c2c60b1 100644 --- a/sklearn/linear_model/_sgd_fast.pyx +++ b/sklearn/linear_model/_sgd_fast.pyx @@ -6,18 +6,21 @@ # License: BSD 3 clause -import numpy as np import sys from time import time -from libc.math cimport exp, log, sqrt, pow, fabs +import numpy as np + cimport numpy as cnp +from libc.math cimport exp, fabs, log, pow, sqrt from numpy.math cimport INFINITY + + cdef extern from "_sgd_fast_helpers.h": bint skl_isfinite(double) nogil -from ..utils._weight_vector cimport WeightVector64 as WeightVector from ..utils._seq_dataset cimport SequentialDataset64 as SequentialDataset +from ..utils._weight_vector cimport WeightVector64 as WeightVector cnp.import_array() diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index a4c129d101ef1..59babd074e9b2 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -6,36 +6,34 @@ Descent (SGD). """ -import numpy as np import warnings - from abc import ABCMeta, abstractmethod +import numpy as np + from joblib import Parallel -from ..base import clone, is_classifier -from ._base import LinearClassifierMixin, SparseCoefMixin -from ._base import make_dataset -from ..base import BaseEstimator, RegressorMixin, OutlierMixin -from ..utils import check_random_state -from ..utils.metaestimators import available_if +from ..base import BaseEstimator, OutlierMixin, RegressorMixin, clone, is_classifier +from ..exceptions import ConvergenceWarning +from ..model_selection import ShuffleSplit, StratifiedShuffleSplit +from ..utils import check_random_state, compute_class_weight from ..utils.extmath import safe_sparse_dot -from ..utils.multiclass import _check_partial_fit_first_call -from ..utils.validation import check_is_fitted, _check_sample_weight from ..utils.fixes import delayed -from ..exceptions import ConvergenceWarning -from ..model_selection import StratifiedShuffleSplit, ShuffleSplit - -from ._sgd_fast import _plain_sgd -from ..utils import compute_class_weight -from ._sgd_fast import Hinge -from ._sgd_fast import SquaredHinge -from ._sgd_fast import Log -from ._sgd_fast import ModifiedHuber -from ._sgd_fast import SquaredLoss -from ._sgd_fast import Huber -from ._sgd_fast import EpsilonInsensitive -from ._sgd_fast import SquaredEpsilonInsensitive +from ..utils.metaestimators import available_if +from ..utils.multiclass import _check_partial_fit_first_call +from ..utils.validation import _check_sample_weight, check_is_fitted +from ._base import LinearClassifierMixin, SparseCoefMixin, make_dataset +from ._sgd_fast import ( + EpsilonInsensitive, + Hinge, + Huber, + Log, + ModifiedHuber, + SquaredEpsilonInsensitive, + SquaredHinge, + SquaredLoss, + _plain_sgd, +) LEARNING_RATE_TYPES = { "constant": 1, diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index b51c5d8695e01..51f341f932ee7 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -7,22 +7,23 @@ # License: BSD 3 clause -import warnings import numbers +import warnings from itertools import combinations import numpy as np from scipy import linalg -from scipy.special import binom from scipy.linalg.lapack import get_lapack_funcs +from scipy.special import binom + from joblib import Parallel, effective_n_jobs -from ._base import LinearModel from ..base import RegressorMixin +from ..exceptions import ConvergenceWarning from ..utils import check_random_state -from ..utils.validation import check_scalar from ..utils.fixes import delayed -from ..exceptions import ConvergenceWarning +from ..utils.validation import check_scalar +from ._base import LinearModel _EPSILON = np.finfo(np.double).eps diff --git a/sklearn/linear_model/setup.py b/sklearn/linear_model/setup.py index 74d7d9e2b05ea..ce02400af7736 100644 --- a/sklearn/linear_model/setup.py +++ b/sklearn/linear_model/setup.py @@ -1,4 +1,5 @@ import os + import numpy from sklearn._build_utils import gen_from_templates diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 26433109b334d..301c17ccc63b5 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -4,28 +4,27 @@ # # License: BSD 3 clause -import pytest import warnings import numpy as np -from scipy import sparse -from scipy import linalg - -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils import check_random_state +import pytest +from scipy import linalg, sparse +from sklearn.datasets import load_iris, make_regression, make_sparse_uncorrelated from sklearn.linear_model import LinearRegression -from sklearn.linear_model._base import _deprecate_normalize -from sklearn.linear_model._base import _preprocess_data -from sklearn.linear_model._base import _rescale_data -from sklearn.linear_model._base import make_dataset -from sklearn.datasets import make_sparse_uncorrelated -from sklearn.datasets import make_regression -from sklearn.datasets import load_iris -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import add_dummy_feature +from sklearn.linear_model._base import ( + _deprecate_normalize, + _preprocess_data, + _rescale_data, + make_dataset, +) +from sklearn.preprocessing import StandardScaler, add_dummy_feature +from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, +) rng = np.random.RandomState(0) rtol = 1e-6 diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 4044aefc3e446..02273717cd968 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -8,14 +8,14 @@ import numpy as np import pytest - -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_less -from sklearn.utils import check_random_state -from sklearn.linear_model import BayesianRidge, ARDRegression -from sklearn.linear_model import Ridge from sklearn import datasets +from sklearn.linear_model import ARDRegression, BayesianRidge, Ridge +from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_less, +) from sklearn.utils.extmath import fast_logdet diabetes = datasets.load_diabetes() diff --git a/sklearn/linear_model/tests/test_common.py b/sklearn/linear_model/tests/test_common.py index 49e506227ccfa..06dee1a00706d 100644 --- a/sklearn/linear_model/tests/test_common.py +++ b/sklearn/linear_model/tests/test_common.py @@ -2,23 +2,24 @@ # # License: BSD 3 clause -import pytest - import sys import warnings + import numpy as np +import pytest from sklearn.base import is_classifier -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import Ridge -from sklearn.linear_model import RidgeCV -from sklearn.linear_model import RidgeClassifier -from sklearn.linear_model import RidgeClassifierCV -from sklearn.linear_model import BayesianRidge -from sklearn.linear_model import ARDRegression - -from sklearn.utils.fixes import np_version, parse_version +from sklearn.linear_model import ( + ARDRegression, + BayesianRidge, + LinearRegression, + Ridge, + RidgeClassifier, + RidgeClassifierCV, + RidgeCV, +) from sklearn.utils import check_random_state +from sklearn.utils.fixes import np_version, parse_version @pytest.mark.parametrize( diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index e5d7ba358c1f5..2b93931a63d9a 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -2,51 +2,28 @@ # Alexandre Gramfort # License: BSD 3 clause +import warnings +from copy import deepcopy + import numpy as np import pytest -import warnings from scipy import interpolate, sparse -from copy import deepcopy -import joblib -from sklearn.base import is_classifier -from sklearn.base import clone -from sklearn.datasets import load_diabetes -from sklearn.datasets import make_regression -from sklearn.model_selection import ( - GridSearchCV, - LeaveOneGroupOut, - train_test_split, -) -from sklearn.pipeline import make_pipeline -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler +import joblib +from sklearn.base import clone, is_classifier +from sklearn.datasets import load_diabetes, make_regression from sklearn.exceptions import ConvergenceWarning -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import _convert_container - -from sklearn.utils._testing import TempMemmap -from sklearn.utils import check_random_state -from sklearn.utils.sparsefuncs import mean_variance_axis - from sklearn.linear_model import ( ARDRegression, BayesianRidge, ElasticNet, ElasticNetCV, - enet_path, Lars, - lars_path, Lasso, LassoCV, LassoLars, LassoLarsCV, LassoLarsIC, - lasso_path, LinearRegression, MultiTaskElasticNet, MultiTaskElasticNetCV, @@ -57,11 +34,25 @@ RidgeClassifier, RidgeClassifierCV, RidgeCV, + enet_path, + lars_path, + lasso_path, ) - from sklearn.linear_model._coordinate_descent import _set_order -from sklearn.utils import check_array - +from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut, train_test_split +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.utils import check_array, check_random_state +from sklearn.utils._testing import ( + TempMemmap, + _convert_container, + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.sparsefuncs import mean_variance_axis # FIXME: 'normalize' to be removed in 1.2 filterwarnings_normalize = pytest.mark.filterwarnings( @@ -359,8 +350,8 @@ def test_lasso_cv(): def test_lasso_cv_with_some_model_selection(): - from sklearn.model_selection import ShuffleSplit from sklearn import datasets + from sklearn.model_selection import ShuffleSplit diabetes = datasets.load_diabetes() X = diabetes.data diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index 88a5d096772b3..d2552d1b990fd 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -4,13 +4,14 @@ import numpy as np from scipy import optimize, sparse -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal - from sklearn.datasets import make_regression -from sklearn.linear_model import HuberRegressor, LinearRegression, SGDRegressor, Ridge +from sklearn.linear_model import HuberRegressor, LinearRegression, Ridge, SGDRegressor from sklearn.linear_model._huber import _huber_loss_and_gradient +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) def make_regression_with_outliers(n_samples=50, n_features=20): diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index db5a3dfd114b9..a7db721f645e4 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -3,20 +3,29 @@ import numpy as np import pytest from scipy import linalg + +from sklearn import datasets, linear_model from sklearn.base import clone +from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import ( + Lars, + LarsCV, + LassoLars, + LassoLarsCV, + LassoLarsIC, + lars_path, +) +from sklearn.linear_model._least_angle import _lars_path_residues from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import TempMemmap from sklearn.utils import check_random_state -from sklearn.exceptions import ConvergenceWarning -from sklearn import linear_model, datasets -from sklearn.linear_model._least_angle import _lars_path_residues -from sklearn.linear_model import LassoLarsIC, lars_path -from sklearn.linear_model import Lars, LassoLars, LarsCV, LassoLarsCV +from sklearn.utils._testing import ( + TempMemmap, + assert_allclose, + assert_array_almost_equal, + ignore_warnings, +) # TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() @@ -59,8 +68,8 @@ def test_simple(): # Principle of Lars is to keep covariances tied and decreasing # also test verbose output - from io import StringIO import sys + from io import StringIO old_stdout = sys.stdout try: diff --git a/sklearn/linear_model/tests/test_linear_loss.py b/sklearn/linear_model/tests/test_linear_loss.py index d4e20ad69ca8a..bfe874389ffc8 100644 --- a/sklearn/linear_model/tests/test_linear_loss.py +++ b/sklearn/linear_model/tests/test_linear_loss.py @@ -4,21 +4,16 @@ Note that correctness of losses (which compose LinearModelLoss) is already well covered in the _loss module. """ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from scipy import linalg, optimize, sparse -from sklearn._loss.loss import ( - HalfBinomialLoss, - HalfMultinomialLoss, - HalfPoissonLoss, -) +from sklearn._loss.loss import HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss from sklearn.datasets import make_low_rank_matrix from sklearn.linear_model._linear_loss import LinearModelLoss from sklearn.utils.extmath import squared_norm - # We do not need to test all losses, just what LinearModelLoss does on top of the # base losses. LOSSES = [HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss] diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 5bb2b83094290..2b7f2a327d7b1 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -1,36 +1,37 @@ import itertools import os import re -import numpy as np -from numpy.testing import assert_allclose, assert_almost_equal -from numpy.testing import assert_array_almost_equal, assert_array_equal -from scipy import sparse +import numpy as np import pytest +from numpy.testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) +from scipy import sparse from sklearn.base import clone from sklearn.datasets import load_iris, make_classification -from sklearn.metrics import log_loss -from sklearn.metrics import get_scorer -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import train_test_split -from sklearn.model_selection import cross_val_score -from sklearn.preprocessing import LabelEncoder, StandardScaler -from sklearn.utils import compute_class_weight, _IS_32BIT -from sklearn.utils._testing import ignore_warnings -from sklearn.utils import shuffle -from sklearn.linear_model import SGDClassifier -from sklearn.preprocessing import scale -from sklearn.utils._testing import skip_if_no_parallel - from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import SGDClassifier from sklearn.linear_model._logistic import ( - _log_reg_scoring_path, - _logistic_regression_path, LogisticRegression, LogisticRegressionCV, + _log_reg_scoring_path, + _logistic_regression_path, +) +from sklearn.metrics import get_scorer, log_loss +from sklearn.model_selection import ( + GridSearchCV, + StratifiedKFold, + cross_val_score, + train_test_split, ) +from sklearn.preprocessing import LabelEncoder, StandardScaler, scale +from sklearn.utils import _IS_32BIT, compute_class_weight, shuffle +from sklearn.utils._testing import ignore_warnings, skip_if_no_parallel X = [[-1, 0], [0, 1], [1, 1]] X_sp = sparse.csr_matrix(X) diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index 1a9a0a8b40c82..e1144dca5302e 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -1,25 +1,26 @@ # Author: Vlad Niculae # License: BSD 3 clause -import numpy as np -import pytest import warnings -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings - +import numpy as np +import pytest +from sklearn.datasets import make_sparse_coded_signal from sklearn.linear_model import ( - orthogonal_mp, - orthogonal_mp_gram, + LinearRegression, OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV, - LinearRegression, + orthogonal_mp, + orthogonal_mp_gram, ) from sklearn.utils import check_random_state -from sklearn.datasets import make_sparse_coded_signal +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3 y, X, gamma = make_sparse_coded_signal( diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py index 3ff92bd69a43b..c6555895ef6cc 100644 --- a/sklearn/linear_model/tests/test_passive_aggressive.py +++ b/sklearn/linear_model/tests/test_passive_aggressive.py @@ -1,17 +1,16 @@ import numpy as np -import scipy.sparse as sp - import pytest +import scipy.sparse as sp -from sklearn.base import is_classifier -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.base import ClassifierMixin -from sklearn.utils import check_random_state +from sklearn.base import ClassifierMixin, is_classifier from sklearn.datasets import load_iris -from sklearn.linear_model import PassiveAggressiveClassifier -from sklearn.linear_model import PassiveAggressiveRegressor +from sklearn.linear_model import PassiveAggressiveClassifier, PassiveAggressiveRegressor +from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) iris = load_iris() random_state = check_random_state(12) diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py index 4c4f092c69d71..e2c947a887bde 100644 --- a/sklearn/linear_model/tests/test_perceptron.py +++ b/sklearn/linear_model/tests/test_perceptron.py @@ -1,12 +1,11 @@ import numpy as np -import scipy.sparse as sp import pytest +import scipy.sparse as sp -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils import check_random_state from sklearn.datasets import load_iris from sklearn.linear_model import Perceptron +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_allclose, assert_array_almost_equal iris = load_iris() random_state = check_random_state(12) diff --git a/sklearn/linear_model/tests/test_quantile.py b/sklearn/linear_model/tests/test_quantile.py index 4c22c46aff463..a61f424a3cbbe 100644 --- a/sklearn/linear_model/tests/test_quantile.py +++ b/sklearn/linear_model/tests/test_quantile.py @@ -5,15 +5,14 @@ import numpy as np import pytest from pytest import approx -from scipy.optimize import minimize from scipy import sparse +from scipy.optimize import minimize from sklearn.datasets import make_regression from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import HuberRegressor, QuantileRegressor from sklearn.metrics import mean_pinball_loss -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import skip_if_32bit +from sklearn.utils._testing import assert_allclose, skip_if_32bit from sklearn.utils.fixes import parse_version, sp_version diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index 53f6b2d1f75eb..62731cad5e767 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -1,18 +1,19 @@ import numpy as np import pytest +from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse -from numpy.testing import assert_array_almost_equal -from numpy.testing import assert_array_equal - -from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_allclose from sklearn.datasets import make_regression -from sklearn.linear_model import LinearRegression, RANSACRegressor, Ridge -from sklearn.linear_model import OrthogonalMatchingPursuit -from sklearn.linear_model._ransac import _dynamic_max_trials from sklearn.exceptions import ConvergenceWarning - +from sklearn.linear_model import ( + LinearRegression, + OrthogonalMatchingPursuit, + RANSACRegressor, + Ridge, +) +from sklearn.linear_model._ransac import _dynamic_max_trials +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_allclose # Generate coordinates of line X = np.arange(-200, 200) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 1f05d821efed4..eb7006c9ed91e 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1,53 +1,54 @@ -import numpy as np -import scipy.sparse as sp -from scipy import linalg +import warnings from itertools import product +import numpy as np import pytest -import warnings - -from sklearn.utils import _IS_32BIT -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils.estimator_checks import check_sample_weights_invariance - -from sklearn.exceptions import ConvergenceWarning +import scipy.sparse as sp +from scipy import linalg from sklearn import datasets -from sklearn.metrics import mean_squared_error -from sklearn.metrics import make_scorer -from sklearn.metrics import get_scorer - -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import ridge_regression -from sklearn.linear_model import Ridge -from sklearn.linear_model._ridge import _RidgeGCV -from sklearn.linear_model import RidgeCV -from sklearn.linear_model import RidgeClassifier -from sklearn.linear_model import RidgeClassifierCV -from sklearn.linear_model._ridge import _solve_cholesky -from sklearn.linear_model._ridge import _solve_cholesky_kernel -from sklearn.linear_model._ridge import _solve_svd -from sklearn.linear_model._ridge import _solve_lbfgs -from sklearn.linear_model._ridge import _check_gcv_mode -from sklearn.linear_model._ridge import _X_CenterStackOp -from sklearn.datasets import make_low_rank_matrix -from sklearn.datasets import make_regression -from sklearn.datasets import make_classification -from sklearn.datasets import make_multilabel_classification - -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import KFold -from sklearn.model_selection import GroupKFold -from sklearn.model_selection import cross_val_predict -from sklearn.model_selection import LeaveOneOut - +from sklearn.datasets import ( + make_classification, + make_low_rank_matrix, + make_multilabel_classification, + make_regression, +) +from sklearn.exceptions import ConvergenceWarning +from sklearn.linear_model import ( + LinearRegression, + Ridge, + RidgeClassifier, + RidgeClassifierCV, + RidgeCV, + ridge_regression, +) +from sklearn.linear_model._ridge import ( + _check_gcv_mode, + _RidgeGCV, + _solve_cholesky, + _solve_cholesky_kernel, + _solve_lbfgs, + _solve_svd, + _X_CenterStackOp, +) +from sklearn.metrics import get_scorer, make_scorer, mean_squared_error +from sklearn.model_selection import ( + GridSearchCV, + GroupKFold, + KFold, + LeaveOneOut, + cross_val_predict, +) from sklearn.preprocessing import minmax_scale -from sklearn.utils import check_random_state - +from sklearn.utils import _IS_32BIT, check_random_state +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.estimator_checks import check_sample_weights_invariance SOLVERS = ("svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga") SPARSE_SOLVERS_WITH_INTERCEPT = ("sparse_cg", "sag") diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py index d3a27c4088ab7..d49da2b1e1e5b 100644 --- a/sklearn/linear_model/tests/test_sag.py +++ b/sklearn/linear_model/tests/test_sag.py @@ -5,27 +5,28 @@ import math import re -import pytest + import numpy as np +import pytest import scipy.sparse as sp from scipy.special import logsumexp from sklearn._loss.loss import HalfMultinomialLoss +from sklearn.base import clone +from sklearn.datasets import load_iris, make_blobs, make_classification +from sklearn.linear_model import LogisticRegression, Ridge +from sklearn.linear_model._base import make_dataset from sklearn.linear_model._linear_loss import LinearModelLoss from sklearn.linear_model._sag import get_auto_step_size from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples -from sklearn.linear_model import LogisticRegression, Ridge -from sklearn.linear_model._base import make_dataset - +from sklearn.preprocessing import LabelBinarizer, LabelEncoder +from sklearn.utils import check_random_state, compute_class_weight +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, +) from sklearn.utils.extmath import row_norms -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils import compute_class_weight -from sklearn.utils import check_random_state -from sklearn.preprocessing import LabelEncoder, LabelBinarizer -from sklearn.datasets import make_blobs, load_iris, make_classification -from sklearn.base import clone iris = load_iris() diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 1a48afeeb48db..0402b421e5c56 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -1,29 +1,32 @@ import pickle +from unittest.mock import Mock -import joblib -import pytest import numpy as np +import pytest import scipy.sparse as sp -from unittest.mock import Mock - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import ignore_warnings -from sklearn import linear_model, datasets, metrics +import joblib +from sklearn import datasets, linear_model, metrics from sklearn.base import clone, is_classifier -from sklearn.svm import OneClassSVM -from sklearn.preprocessing import LabelEncoder, scale, MinMaxScaler -from sklearn.preprocessing import StandardScaler -from sklearn.kernel_approximation import Nystroem -from sklearn.pipeline import make_pipeline from sklearn.exceptions import ConvergenceWarning -from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit +from sklearn.kernel_approximation import Nystroem from sklearn.linear_model import _sgd_fast as sgd_fast from sklearn.linear_model import _stochastic_gradient -from sklearn.model_selection import RandomizedSearchCV +from sklearn.model_selection import ( + RandomizedSearchCV, + ShuffleSplit, + StratifiedShuffleSplit, +) +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, scale +from sklearn.svm import OneClassSVM +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) def _update_kwargs(kwargs): diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py index b9d87e5207b7e..c1513ecc0c10b 100644 --- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py @@ -1,17 +1,16 @@ import numpy as np -from numpy.testing import assert_allclose import pytest import scipy.sparse as sp +from numpy.testing import assert_allclose from sklearn.datasets import make_regression -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal - -from sklearn.utils._testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning - -from sklearn.linear_model import Lasso, ElasticNet, LassoCV, ElasticNetCV - +from sklearn.linear_model import ElasticNet, ElasticNetCV, Lasso, LassoCV +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + ignore_warnings, +) # FIXME: 'normalize' to be removed in 1.2 filterwarnings_normalize = pytest.mark.filterwarnings( diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py index b067fa07224ed..6befe6e836796 100644 --- a/sklearn/linear_model/tests/test_theil_sen.py +++ b/sklearn/linear_model/tests/test_theil_sen.py @@ -8,16 +8,24 @@ import re import sys from contextlib import contextmanager + import numpy as np import pytest -from numpy.testing import assert_array_equal, assert_array_less -from numpy.testing import assert_array_almost_equal +from numpy.testing import ( + assert_array_almost_equal, + assert_array_equal, + assert_array_less, +) from scipy.linalg import norm from scipy.optimize import fmin_bfgs + from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LinearRegression, TheilSenRegressor -from sklearn.linear_model._theil_sen import _spatial_median, _breakdown_point -from sklearn.linear_model._theil_sen import _modified_weiszfeld_step +from sklearn.linear_model._theil_sen import ( + _breakdown_point, + _modified_weiszfeld_step, + _spatial_median, +) from sklearn.utils._testing import assert_almost_equal diff --git a/sklearn/manifold/__init__.py b/sklearn/manifold/__init__.py index ae708aa1fd65c..1e8d96c7cf94b 100644 --- a/sklearn/manifold/__init__.py +++ b/sklearn/manifold/__init__.py @@ -2,8 +2,8 @@ The :mod:`sklearn.manifold` module implements data embedding techniques. """ -from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding from ._isomap import Isomap +from ._locally_linear import LocallyLinearEmbedding, locally_linear_embedding from ._mds import MDS, smacof from ._spectral_embedding import SpectralEmbedding, spectral_embedding from ._t_sne import TSNE, trustworthiness diff --git a/sklearn/manifold/_barnes_hut_tsne.pyx b/sklearn/manifold/_barnes_hut_tsne.pyx index 2d314c0ccf3a5..6b1c1886bdb01 100644 --- a/sklearn/manifold/_barnes_hut_tsne.pyx +++ b/sklearn/manifold/_barnes_hut_tsne.pyx @@ -6,11 +6,12 @@ import numpy as np + cimport numpy as np +from cython.parallel cimport parallel, prange +from libc.math cimport log, sqrt from libc.stdio cimport printf -from libc.math cimport sqrt, log -from libc.stdlib cimport malloc, free -from cython.parallel cimport prange, parallel +from libc.stdlib cimport free, malloc from ..neighbors._quad_tree cimport _QuadTree diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py index aae9f09fd5a94..9d15f6162105d 100644 --- a/sklearn/manifold/_isomap.py +++ b/sklearn/manifold/_isomap.py @@ -5,18 +5,15 @@ import warnings import numpy as np - from scipy.sparse import issparse -from scipy.sparse.csgraph import shortest_path -from scipy.sparse.csgraph import connected_components +from scipy.sparse.csgraph import connected_components, shortest_path from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..neighbors import NearestNeighbors, kneighbors_graph -from ..neighbors import radius_neighbors_graph -from ..utils.validation import check_is_fitted from ..decomposition import KernelPCA +from ..neighbors import NearestNeighbors, kneighbors_graph, radius_neighbors_graph from ..preprocessing import KernelCenterer from ..utils.graph import _fix_connected_components +from ..utils.validation import check_is_fitted class Isomap(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index a9c6ec350b912..095d4f63201e7 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -5,22 +5,21 @@ # License: BSD 3 clause (C) INRIA 2011 import numpy as np -from scipy.linalg import eigh, svd, qr, solve -from scipy.sparse import eye, csr_matrix +from scipy.linalg import eigh, qr, solve, svd +from scipy.sparse import csr_matrix, eye from scipy.sparse.linalg import eigsh from ..base import ( BaseEstimator, TransformerMixin, - _UnstableArchMixin, _ClassNamePrefixFeaturesOutMixin, + _UnstableArchMixin, ) -from ..utils import check_random_state, check_array +from ..neighbors import NearestNeighbors +from ..utils import check_array, check_random_state from ..utils._arpack import _init_arpack_v0 from ..utils.extmath import stable_cumsum -from ..utils.validation import check_is_fitted -from ..utils.validation import FLOAT_DTYPES -from ..neighbors import NearestNeighbors +from ..utils.validation import FLOAT_DTYPES, check_is_fitted def barycenter_weights(X, Y, indices, reg=1e-3): diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py index 930f8d19b7b5e..157349a5ed08c 100644 --- a/sklearn/manifold/_mds.py +++ b/sklearn/manifold/_mds.py @@ -5,15 +5,16 @@ # author: Nelle Varoquaux # License: BSD +import warnings + import numpy as np -from joblib import Parallel, effective_n_jobs -import warnings +from joblib import Parallel, effective_n_jobs from ..base import BaseEstimator -from ..metrics import euclidean_distances -from ..utils import check_random_state, check_array, check_symmetric from ..isotonic import IsotonicRegression +from ..metrics import euclidean_distances +from ..utils import check_array, check_random_state, check_symmetric from ..utils.fixes import delayed diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py index 54a72313491ee..fe437b4ae2d30 100644 --- a/sklearn/manifold/_spectral_embedding.py +++ b/sklearn/manifold/_spectral_embedding.py @@ -10,21 +10,17 @@ import numpy as np from scipy import sparse from scipy.linalg import eigh -from scipy.sparse.linalg import eigsh from scipy.sparse.csgraph import connected_components from scipy.sparse.csgraph import laplacian as csgraph_laplacian +from scipy.sparse.linalg import eigsh from ..base import BaseEstimator -from ..utils import ( - check_array, - check_random_state, - check_symmetric, -) +from ..metrics.pairwise import rbf_kernel +from ..neighbors import NearestNeighbors, kneighbors_graph +from ..utils import check_array, check_random_state, check_symmetric from ..utils._arpack import _init_arpack_v0 from ..utils.extmath import _deterministic_vector_sign_flip from ..utils.fixes import lobpcg -from ..metrics.pairwise import rbf_kernel -from ..neighbors import kneighbors_graph, NearestNeighbors def _graph_connected_component(graph, node_id): diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py index 5b7a3c4efd753..35fc0ee784a1a 100644 --- a/sklearn/manifold/_t_sne.py +++ b/sklearn/manifold/_t_sne.py @@ -10,25 +10,24 @@ import warnings from time import time + import numpy as np from scipy import linalg -from scipy.spatial.distance import pdist -from scipy.spatial.distance import squareform from scipy.sparse import csr_matrix, issparse -from ..neighbors import NearestNeighbors +from scipy.spatial.distance import pdist, squareform + from ..base import BaseEstimator +from ..decomposition import PCA +from ..metrics.pairwise import pairwise_distances +from ..neighbors import NearestNeighbors from ..utils import check_random_state from ..utils._openmp_helpers import _openmp_effective_n_threads from ..utils.validation import check_non_negative -from ..decomposition import PCA -from ..metrics.pairwise import pairwise_distances - -# mypy error: Module 'sklearn.manifold' has no attribute '_utils' -from . import _utils # type: ignore # mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne' +# mypy error: Module 'sklearn.manifold' has no attribute '_utils' from . import _barnes_hut_tsne # type: ignore - +from . import _utils # type: ignore MACHINE_EPSILON = np.finfo(np.double).eps diff --git a/sklearn/manifold/_utils.pyx b/sklearn/manifold/_utils.pyx index 985aa3388d34c..efcafaab07270 100644 --- a/sklearn/manifold/_utils.pyx +++ b/sklearn/manifold/_utils.pyx @@ -1,6 +1,8 @@ -from libc cimport math cimport cython +from libc cimport math + import numpy as np + cimport numpy as np from libc.stdio cimport printf diff --git a/sklearn/manifold/tests/test_isomap.py b/sklearn/manifold/tests/test_isomap.py index 73365b08a5cfb..6e83f716b9731 100644 --- a/sklearn/manifold/tests/test_isomap.py +++ b/sklearn/manifold/tests/test_isomap.py @@ -1,24 +1,20 @@ +import math from itertools import product + import numpy as np -import math +import pytest from numpy.testing import ( assert_almost_equal, assert_array_almost_equal, assert_array_equal, ) -import pytest +from scipy.sparse import rand as sparse_rand -from sklearn import datasets -from sklearn import manifold -from sklearn import neighbors -from sklearn import pipeline -from sklearn import preprocessing +from sklearn import datasets, manifold, neighbors, pipeline, preprocessing from sklearn.datasets import make_blobs from sklearn.metrics.pairwise import pairwise_distances from sklearn.utils._testing import assert_allclose, assert_allclose_dense_sparse -from scipy.sparse import rand as sparse_rand - eigen_solvers = ["auto", "dense", "arpack"] path_methods = ["auto", "FW", "D"] diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index 4272aa05b71a0..9dc1050c6bd29 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -1,17 +1,13 @@ from itertools import product import numpy as np -from sklearn.utils._testing import ( - assert_allclose, - assert_array_equal, -) -from scipy import linalg import pytest +from scipy import linalg -from sklearn import neighbors, manifold +from sklearn import manifold, neighbors from sklearn.datasets import make_blobs from sklearn.manifold._locally_linear import barycenter_kneighbors_graph -from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import assert_allclose, assert_array_equal, ignore_warnings eigen_solvers = ["dense", "arpack"] @@ -134,7 +130,7 @@ def test_pipeline(): # check that LocallyLinearEmbedding works fine as a Pipeline # only checks that no error is raised. # TODO check that it actually does something useful - from sklearn import pipeline, datasets + from sklearn import datasets, pipeline X, y = datasets.make_blobs(random_state=0) clf = pipeline.Pipeline( diff --git a/sklearn/manifold/tests/test_mds.py b/sklearn/manifold/tests/test_mds.py index 242549f248f88..57a30f2022207 100644 --- a/sklearn/manifold/tests/test_mds.py +++ b/sklearn/manifold/tests/test_mds.py @@ -1,6 +1,6 @@ import numpy as np -from numpy.testing import assert_array_almost_equal import pytest +from numpy.testing import assert_array_almost_equal from sklearn.manifold import _mds as mds diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index 935e5408a4159..bb06b27f54489 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -1,23 +1,21 @@ -import pytest - import numpy as np - +import pytest from scipy import sparse -from scipy.sparse import csgraph from scipy.linalg import eigh +from scipy.sparse import csgraph -from sklearn.manifold import SpectralEmbedding -from sklearn.manifold._spectral_embedding import _graph_is_connected -from sklearn.manifold._spectral_embedding import _graph_connected_component -from sklearn.manifold import spectral_embedding -from sklearn.metrics.pairwise import rbf_kernel -from sklearn.metrics import normalized_mutual_info_score -from sklearn.neighbors import NearestNeighbors from sklearn.cluster import KMeans from sklearn.datasets import make_blobs +from sklearn.manifold import SpectralEmbedding, spectral_embedding +from sklearn.manifold._spectral_embedding import ( + _graph_connected_component, + _graph_is_connected, +) +from sklearn.metrics import normalized_mutual_info_score +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.neighbors import NearestNeighbors +from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal from sklearn.utils.extmath import _deterministic_vector_sign_flip -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal try: from pyamg import smoothed_aggregation_solver # noqa diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index 861500e4a8891..bbd26743317b3 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -1,39 +1,43 @@ import sys +import warnings from io import StringIO + import numpy as np -from numpy.testing import assert_allclose -import scipy.sparse as sp import pytest -import warnings +import scipy.sparse as sp +from numpy.testing import assert_allclose +from scipy.optimize import check_grad +from scipy.spatial.distance import pdist, squareform -from sklearn.neighbors import NearestNeighbors -from sklearn.neighbors import kneighbors_graph +from sklearn.datasets import make_blobs from sklearn.exceptions import EfficiencyWarning -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import skip_if_32bit -from sklearn.utils import check_random_state -from sklearn.manifold._t_sne import _joint_probabilities -from sklearn.manifold._t_sne import _joint_probabilities_nn -from sklearn.manifold._t_sne import _kl_divergence -from sklearn.manifold._t_sne import _kl_divergence_bh -from sklearn.manifold._t_sne import _gradient_descent -from sklearn.manifold._t_sne import trustworthiness -from sklearn.manifold import TSNE # mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne' from sklearn.manifold import _barnes_hut_tsne # type: ignore +from sklearn.manifold import TSNE +from sklearn.manifold._t_sne import ( + _gradient_descent, + _joint_probabilities, + _joint_probabilities_nn, + _kl_divergence, + _kl_divergence_bh, + trustworthiness, +) from sklearn.manifold._utils import _binary_search_perplexity -from sklearn.datasets import make_blobs -from scipy.optimize import check_grad -from scipy.spatial.distance import pdist -from scipy.spatial.distance import squareform -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.metrics.pairwise import manhattan_distances -from sklearn.metrics.pairwise import cosine_distances - +from sklearn.metrics.pairwise import ( + cosine_distances, + manhattan_distances, + pairwise_distances, +) +from sklearn.neighbors import NearestNeighbors, kneighbors_graph +from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, + skip_if_32bit, +) x = np.linspace(0, 1, 10) xx, yy = np.meshgrid(x, x) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 0c6f74a8b7f38..37d5c89fce485 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -4,100 +4,94 @@ """ -from ._ranking import auc -from ._ranking import average_precision_score -from ._ranking import coverage_error -from ._ranking import det_curve -from ._ranking import dcg_score -from ._ranking import label_ranking_average_precision_score -from ._ranking import label_ranking_loss -from ._ranking import ndcg_score -from ._ranking import precision_recall_curve -from ._ranking import roc_auc_score -from ._ranking import roc_curve -from ._ranking import top_k_accuracy_score - -from ._classification import accuracy_score -from ._classification import balanced_accuracy_score -from ._classification import classification_report -from ._classification import cohen_kappa_score -from ._classification import confusion_matrix -from ._classification import f1_score -from ._classification import fbeta_score -from ._classification import hamming_loss -from ._classification import hinge_loss -from ._classification import jaccard_score -from ._classification import log_loss -from ._classification import matthews_corrcoef -from ._classification import precision_recall_fscore_support -from ._classification import precision_score -from ._classification import recall_score -from ._classification import zero_one_loss -from ._classification import brier_score_loss -from ._classification import multilabel_confusion_matrix - -from ._dist_metrics import DistanceMetric - from . import cluster -from .cluster import adjusted_mutual_info_score -from .cluster import adjusted_rand_score -from .cluster import rand_score -from .cluster import pair_confusion_matrix -from .cluster import completeness_score -from .cluster import consensus_score -from .cluster import homogeneity_completeness_v_measure -from .cluster import homogeneity_score -from .cluster import mutual_info_score -from .cluster import normalized_mutual_info_score -from .cluster import fowlkes_mallows_score -from .cluster import silhouette_samples -from .cluster import silhouette_score -from .cluster import calinski_harabasz_score -from .cluster import v_measure_score -from .cluster import davies_bouldin_score - -from .pairwise import euclidean_distances -from .pairwise import nan_euclidean_distances -from .pairwise import pairwise_distances -from .pairwise import pairwise_distances_argmin -from .pairwise import pairwise_distances_argmin_min -from .pairwise import pairwise_kernels -from .pairwise import pairwise_distances_chunked - -from ._regression import explained_variance_score -from ._regression import max_error -from ._regression import mean_absolute_error -from ._regression import mean_squared_error -from ._regression import mean_squared_log_error -from ._regression import median_absolute_error -from ._regression import mean_absolute_percentage_error -from ._regression import mean_pinball_loss -from ._regression import r2_score -from ._regression import mean_tweedie_deviance -from ._regression import mean_poisson_deviance -from ._regression import mean_gamma_deviance -from ._regression import d2_tweedie_score -from ._regression import d2_pinball_score -from ._regression import d2_absolute_error_score - - -from ._scorer import check_scoring -from ._scorer import make_scorer -from ._scorer import SCORERS -from ._scorer import get_scorer -from ._scorer import get_scorer_names - - -from ._plot.det_curve import plot_det_curve -from ._plot.det_curve import DetCurveDisplay -from ._plot.roc_curve import plot_roc_curve -from ._plot.roc_curve import RocCurveDisplay -from ._plot.precision_recall_curve import plot_precision_recall_curve -from ._plot.precision_recall_curve import PrecisionRecallDisplay - -from ._plot.confusion_matrix import plot_confusion_matrix -from ._plot.confusion_matrix import ConfusionMatrixDisplay - +from ._classification import ( + accuracy_score, + balanced_accuracy_score, + brier_score_loss, + classification_report, + cohen_kappa_score, + confusion_matrix, + f1_score, + fbeta_score, + hamming_loss, + hinge_loss, + jaccard_score, + log_loss, + matthews_corrcoef, + multilabel_confusion_matrix, + precision_recall_fscore_support, + precision_score, + recall_score, + zero_one_loss, +) +from ._dist_metrics import DistanceMetric +from ._plot.confusion_matrix import ConfusionMatrixDisplay, plot_confusion_matrix +from ._plot.det_curve import DetCurveDisplay, plot_det_curve +from ._plot.precision_recall_curve import ( + PrecisionRecallDisplay, + plot_precision_recall_curve, +) +from ._plot.roc_curve import RocCurveDisplay, plot_roc_curve +from ._ranking import ( + auc, + average_precision_score, + coverage_error, + dcg_score, + det_curve, + label_ranking_average_precision_score, + label_ranking_loss, + ndcg_score, + precision_recall_curve, + roc_auc_score, + roc_curve, + top_k_accuracy_score, +) +from ._regression import ( + d2_absolute_error_score, + d2_pinball_score, + d2_tweedie_score, + explained_variance_score, + max_error, + mean_absolute_error, + mean_absolute_percentage_error, + mean_gamma_deviance, + mean_pinball_loss, + mean_poisson_deviance, + mean_squared_error, + mean_squared_log_error, + mean_tweedie_deviance, + median_absolute_error, + r2_score, +) +from ._scorer import SCORERS, check_scoring, get_scorer, get_scorer_names, make_scorer +from .cluster import ( + adjusted_mutual_info_score, + adjusted_rand_score, + calinski_harabasz_score, + completeness_score, + consensus_score, + davies_bouldin_score, + fowlkes_mallows_score, + homogeneity_completeness_v_measure, + homogeneity_score, + mutual_info_score, + normalized_mutual_info_score, + pair_confusion_matrix, + rand_score, + silhouette_samples, + silhouette_score, + v_measure_score, +) +from .pairwise import ( + euclidean_distances, + nan_euclidean_distances, + pairwise_distances, + pairwise_distances_argmin, + pairwise_distances_argmin_min, + pairwise_distances_chunked, + pairwise_kernels, +) __all__ = [ "accuracy_score", diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index d759f6c4b3e76..fa3c4d81c6c9c 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -24,23 +24,21 @@ import warnings + import numpy as np +from scipy.sparse import coo_matrix, csr_matrix -from scipy.sparse import coo_matrix -from scipy.sparse import csr_matrix - -from ..preprocessing import LabelBinarizer -from ..preprocessing import LabelEncoder -from ..utils import assert_all_finite -from ..utils import check_array -from ..utils import check_consistent_length -from ..utils import column_or_1d -from ..utils.multiclass import unique_labels -from ..utils.multiclass import type_of_target -from ..utils.validation import _num_samples -from ..utils.sparsefuncs import count_nonzero from ..exceptions import UndefinedMetricWarning - +from ..preprocessing import LabelBinarizer, LabelEncoder +from ..utils import ( + assert_all_finite, + check_array, + check_consistent_length, + column_or_1d, +) +from ..utils.multiclass import type_of_target, unique_labels +from ..utils.sparsefuncs import count_nonzero +from ..utils.validation import _num_samples from ._base import _check_pos_label_consistency diff --git a/sklearn/metrics/_dist_metrics.pxd b/sklearn/metrics/_dist_metrics.pxd index 2c45bd6f40dff..d4aefec5cc9f3 100644 --- a/sklearn/metrics/_dist_metrics.pxd +++ b/sklearn/metrics/_dist_metrics.pxd @@ -1,8 +1,9 @@ cimport numpy as cnp -from libc.math cimport sqrt, exp +from libc.math cimport exp, sqrt from ..utils._typedefs cimport DTYPE_t, ITYPE_t + ###################################################################### # Inline distance functions # diff --git a/sklearn/metrics/_dist_metrics.pyx b/sklearn/metrics/_dist_metrics.pyx index e3ad251a23e01..2c74dd5d75962 100644 --- a/sklearn/metrics/_dist_metrics.pyx +++ b/sklearn/metrics/_dist_metrics.pyx @@ -3,6 +3,7 @@ # License: BSD import numpy as np + cimport numpy as cnp from cython cimport final @@ -25,14 +26,19 @@ cdef inline cnp.ndarray _buffer_to_ndarray(const DTYPE_t* x, cnp.npy_intp n): return PyArray_SimpleNewFromData(1, &n, DTYPECODE, x) -from libc.math cimport fabs, sqrt, exp, pow, cos, sin, asin +from libc.math cimport asin, cos, exp, fabs, pow, sin, sqrt + + cdef DTYPE_t INF = np.inf from scipy.sparse import csr_matrix, issparse -from ..utils._typedefs cimport DTYPE_t, ITYPE_t, DTYPECODE -from ..utils._typedefs import DTYPE, ITYPE -from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper + +from ..utils._typedefs cimport DTYPECODE, DTYPE_t, ITYPE_t + from ..utils import check_array +from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper +from ..utils._typedefs import DTYPE, ITYPE + ###################################################################### # newObj function diff --git a/sklearn/metrics/_pairwise_distances_reduction.pyx b/sklearn/metrics/_pairwise_distances_reduction.pyx index 9191efae2a8da..d1a6f1b8776f5 100644 --- a/sklearn/metrics/_pairwise_distances_reduction.pyx +++ b/sklearn/metrics/_pairwise_distances_reduction.pyx @@ -14,45 +14,49 @@ # (using Cython prange loops) which gives another multiplicative speed-up in # favorable cases on many-core machines. cimport numpy as cnp -import numpy as np + import warnings +import numpy as np + from .. import get_config -from libc.stdlib cimport free, malloc -from libc.float cimport DBL_MAX -from libcpp.memory cimport shared_ptr, make_shared -from libcpp.vector cimport vector + +from cpython.ref cimport Py_INCREF from cython cimport final from cython.operator cimport dereference as deref from cython.parallel cimport parallel, prange -from cpython.ref cimport Py_INCREF +from libc.float cimport DBL_MAX +from libc.stdlib cimport free, malloc +from libcpp.memory cimport make_shared, shared_ptr +from libcpp.vector cimport vector -from ._dist_metrics cimport DatasetsPair, DenseDenseDatasetsPair from ..utils._cython_blas cimport ( - BLAS_Order, - BLAS_Trans, - ColMajor, - NoTrans, - RowMajor, - Trans, - _dot, - _gemm, + BLAS_Order, + BLAS_Trans, + ColMajor, + NoTrans, + RowMajor, + Trans, + _dot, + _gemm, ) from ..utils._heap cimport heap_push -from ..utils._sorting cimport simultaneous_sort from ..utils._openmp_helpers cimport _openmp_thread_num -from ..utils._typedefs cimport ITYPE_t, DTYPE_t +from ..utils._sorting cimport simultaneous_sort +from ..utils._typedefs cimport DTYPE_t, ITYPE_t from ..utils._vector_sentinel cimport vector_to_nd_array +from ._dist_metrics cimport DatasetsPair, DenseDenseDatasetsPair from numbers import Integral, Real from typing import List + from scipy.sparse import issparse -from ._dist_metrics import BOOL_METRICS, METRIC_MAPPING -from ..utils import check_scalar, _in_unstable_openblas_configuration -from ..utils.fixes import threadpool_limits -from ..utils._openmp_helpers import _openmp_effective_n_threads -from ..utils._typedefs import ITYPE, DTYPE +from ..utils import _in_unstable_openblas_configuration, check_scalar +from ..utils._openmp_helpers import _openmp_effective_n_threads +from ..utils._typedefs import DTYPE, ITYPE +from ..utils.fixes import threadpool_limits +from ._dist_metrics import BOOL_METRICS, METRIC_MAPPING cnp.import_array() diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py index 590a95970a0e4..c84cca834e494 100644 --- a/sklearn/metrics/_plot/confusion_matrix.py +++ b/sklearn/metrics/_plot/confusion_matrix.py @@ -2,11 +2,10 @@ import numpy as np -from .. import confusion_matrix -from ...utils import check_matplotlib_support -from ...utils import deprecated -from ...utils.multiclass import unique_labels from ...base import is_classifier +from ...utils import check_matplotlib_support, deprecated +from ...utils.multiclass import unique_labels +from .. import confusion_matrix class ConfusionMatrixDisplay: diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py index 92e84ce9b7974..a5a4e41b2622d 100644 --- a/sklearn/metrics/_plot/det_curve.py +++ b/sklearn/metrics/_plot/det_curve.py @@ -1,12 +1,9 @@ import scipy as sp -from .base import _get_response - -from .. import det_curve +from ...utils import check_matplotlib_support, deprecated from .._base import _check_pos_label_consistency - -from ...utils import check_matplotlib_support -from ...utils import deprecated +from .._ranking import det_curve +from .base import _get_response class DetCurveDisplay: diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py index b3ccab0825703..cc9408d90a131 100644 --- a/sklearn/metrics/_plot/precision_recall_curve.py +++ b/sklearn/metrics/_plot/precision_recall_curve.py @@ -1,12 +1,10 @@ from sklearn.base import is_classifier -from .base import _get_response -from .. import average_precision_score -from .. import precision_recall_curve +from ...utils import check_matplotlib_support, deprecated from .._base import _check_pos_label_consistency from .._classification import check_consistent_length - -from ...utils import check_matplotlib_support, deprecated +from .._ranking import average_precision_score, precision_recall_curve +from .base import _get_response class PrecisionRecallDisplay: diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py index a56cd3755b8d6..b2fb09225ed55 100644 --- a/sklearn/metrics/_plot/roc_curve.py +++ b/sklearn/metrics/_plot/roc_curve.py @@ -1,10 +1,7 @@ -from .base import _get_response - -from .. import auc -from .. import roc_curve -from .._base import _check_pos_label_consistency - from ...utils import check_matplotlib_support, deprecated +from .._base import _check_pos_label_consistency +from .._ranking import auc, roc_curve +from .base import _get_response class RocCurveDisplay: diff --git a/sklearn/metrics/_plot/tests/test_base.py b/sklearn/metrics/_plot/tests/test_base.py index 2f67d7dd223f4..fa860968e6044 100644 --- a/sklearn/metrics/_plot/tests/test_base.py +++ b/sklearn/metrics/_plot/tests/test_base.py @@ -3,9 +3,8 @@ from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor - from sklearn.metrics._plot.base import _get_response +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor @pytest.mark.parametrize( diff --git a/sklearn/metrics/_plot/tests/test_common_curve_display.py b/sklearn/metrics/_plot/tests/test_common_curve_display.py index 5ed036b77f4d0..0a0a31f0c0c17 100644 --- a/sklearn/metrics/_plot/tests/test_common_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_common_curve_display.py @@ -5,16 +5,11 @@ from sklearn.datasets import load_iris from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression +from sklearn.metrics import DetCurveDisplay, PrecisionRecallDisplay, RocCurveDisplay from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import ( - DetCurveDisplay, - PrecisionRecallDisplay, - RocCurveDisplay, -) - @pytest.fixture(scope="module") def data(): diff --git a/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py b/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py index e826888b65f89..e0911b47d13b2 100644 --- a/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py +++ b/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py @@ -1,22 +1,16 @@ -from numpy.testing import ( - assert_allclose, - assert_array_equal, -) import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_equal -from sklearn.datasets import make_classification from sklearn.compose import make_column_transformer +from sklearn.datasets import make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression +from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, SVR -from sklearn.metrics import ConfusionMatrixDisplay -from sklearn.metrics import confusion_matrix - - # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/metrics/_plot/tests/test_det_curve_display.py b/sklearn/metrics/_plot/tests/test_det_curve_display.py index 5d7a26d5e49a0..403ea70109577 100644 --- a/sklearn/metrics/_plot/tests/test_det_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_det_curve_display.py @@ -1,12 +1,10 @@ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression - -from sklearn.metrics import det_curve -from sklearn.metrics import DetCurveDisplay +from sklearn.metrics import DetCurveDisplay, det_curve @pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"]) diff --git a/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py b/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py index 4a4c4a96a5b32..cc99298dbfc55 100644 --- a/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py +++ b/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py @@ -1,22 +1,21 @@ # TODO: remove this file when plot_confusion_matrix will be deprecated in 1.2 -import pytest import numpy as np -from numpy.testing import assert_allclose -from numpy.testing import assert_array_equal +import pytest +from numpy.testing import assert_allclose, assert_array_equal from sklearn.compose import make_column_transformer from sklearn.datasets import make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression +from sklearn.metrics import ( + ConfusionMatrixDisplay, + confusion_matrix, + plot_confusion_matrix, +) from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, SVR -from sklearn.metrics import confusion_matrix -from sklearn.metrics import plot_confusion_matrix -from sklearn.metrics import ConfusionMatrixDisplay - - # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/metrics/_plot/tests/test_plot_curve_common.py b/sklearn/metrics/_plot/tests/test_plot_curve_common.py index d430acd42596c..728e0a2694a06 100644 --- a/sklearn/metrics/_plot/tests/test_plot_curve_common.py +++ b/sklearn/metrics/_plot/tests/test_plot_curve_common.py @@ -1,18 +1,15 @@ import pytest -from sklearn.base import ClassifierMixin -from sklearn.base import clone +from sklearn.base import ClassifierMixin, clone from sklearn.compose import make_column_transformer from sklearn.datasets import load_iris from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression +from sklearn.metrics import plot_det_curve, plot_roc_curve from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeClassifier -from sklearn.metrics import plot_det_curve -from sklearn.metrics import plot_roc_curve - pytestmark = pytest.mark.filterwarnings( "ignore:Function plot_roc_curve is deprecated", ) diff --git a/sklearn/metrics/_plot/tests/test_plot_det_curve.py b/sklearn/metrics/_plot/tests/test_plot_det_curve.py index 31f840a6a1ff5..ad10dc94c08cb 100644 --- a/sklearn/metrics/_plot/tests/test_plot_det_curve.py +++ b/sklearn/metrics/_plot/tests/test_plot_det_curve.py @@ -1,13 +1,11 @@ # TODO: remove this file when plot_det_curve will be deprecated in 1.2 -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression - -from sklearn.metrics import det_curve -from sklearn.metrics import plot_det_curve +from sklearn.metrics import det_curve, plot_det_curve @pytest.fixture(scope="module") diff --git a/sklearn/metrics/_plot/tests/test_plot_precision_recall.py b/sklearn/metrics/_plot/tests/test_plot_precision_recall.py index 1d687b0c31abc..a3fc3ce61253c 100644 --- a/sklearn/metrics/_plot/tests/test_plot_precision_recall.py +++ b/sklearn/metrics/_plot/tests/test_plot_precision_recall.py @@ -1,21 +1,22 @@ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.metrics import plot_precision_recall_curve -from sklearn.metrics import average_precision_score -from sklearn.metrics import precision_recall_curve -from sklearn.datasets import make_classification -from sklearn.datasets import load_breast_cancer -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.compose import make_column_transformer +from sklearn.datasets import load_breast_cancer, make_classification +from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression +from sklearn.metrics import ( + average_precision_score, + plot_precision_recall_curve, + precision_recall_curve, +) from sklearn.model_selection import train_test_split -from sklearn.exceptions import NotFittedError from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils import shuffle -from sklearn.compose import make_column_transformer pytestmark = pytest.mark.filterwarnings( # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved diff --git a/sklearn/metrics/_plot/tests/test_plot_roc_curve.py b/sklearn/metrics/_plot/tests/test_plot_roc_curve.py index 587d1c2d272d9..60946f6a12baa 100644 --- a/sklearn/metrics/_plot/tests/test_plot_roc_curve.py +++ b/sklearn/metrics/_plot/tests/test_plot_roc_curve.py @@ -1,19 +1,16 @@ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose -from sklearn.metrics import plot_roc_curve -from sklearn.metrics import roc_curve -from sklearn.metrics import auc -from sklearn.datasets import load_iris -from sklearn.datasets import load_breast_cancer +from sklearn.compose import make_column_transformer +from sklearn.datasets import load_breast_cancer, load_iris +from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression +from sklearn.metrics import auc, plot_roc_curve, roc_curve from sklearn.model_selection import train_test_split -from sklearn.exceptions import NotFittedError from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle -from sklearn.compose import make_column_transformer # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py index 49b508942ab56..b516486e59a9a 100644 --- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py +++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py @@ -5,15 +5,18 @@ from sklearn.datasets import load_breast_cancer, make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import average_precision_score, precision_recall_curve +from sklearn.metrics import ( + PrecisionRecallDisplay, + average_precision_score, + plot_precision_recall_curve, + precision_recall_curve, +) from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, SVR from sklearn.utils import shuffle -from sklearn.metrics import PrecisionRecallDisplay, plot_precision_recall_curve - # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index e8465b53747ec..13eec20cb5b7b 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -1,26 +1,18 @@ -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose - from sklearn.compose import make_column_transformer -from sklearn.datasets import load_iris - -from sklearn.datasets import load_breast_cancer, make_classification +from sklearn.datasets import load_breast_cancer, load_iris, make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import roc_curve -from sklearn.metrics import auc - +from sklearn.metrics import RocCurveDisplay, auc, plot_roc_curve, roc_curve from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle -from sklearn.metrics import RocCurveDisplay, plot_roc_curve - - @pytest.fixture(scope="module") def data(): return load_iris(return_X_y=True) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 4e88bd5edc888..3cf49851aa502 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -26,17 +26,19 @@ from scipy.sparse import csr_matrix from scipy.stats import rankdata -from ..utils import assert_all_finite -from ..utils import check_consistent_length -from ..utils.validation import _check_sample_weight -from ..utils import column_or_1d, check_array -from ..utils.multiclass import type_of_target -from ..utils.extmath import stable_cumsum -from ..utils.sparsefuncs import count_nonzero from ..exceptions import UndefinedMetricWarning from ..preprocessing import label_binarize +from ..utils import ( + assert_all_finite, + check_array, + check_consistent_length, + column_or_1d, +) from ..utils._encode import _encode, _unique - +from ..utils.extmath import stable_cumsum +from ..utils.multiclass import type_of_target +from ..utils.sparsefuncs import count_nonzero +from ..utils.validation import _check_sample_weight from ._base import ( _average_binary_score, _average_multiclass_ovo_score, diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 57986692fb896..f8d4c941b8fb9 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -33,16 +33,15 @@ from scipy.special import xlogy from ..exceptions import UndefinedMetricWarning +from ..utils.stats import _weighted_percentile from ..utils.validation import ( + _check_sample_weight, + _num_samples, check_array, check_consistent_length, check_scalar, - _num_samples, column_or_1d, - _check_sample_weight, ) -from ..utils.stats import _weighted_percentile - __ALL__ = [ "max_error", diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index e1655af169fcc..652e314ac95cb 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -18,51 +18,51 @@ # Arnaud Joly # License: Simplified BSD +import copy +import warnings +from collections import Counter from collections.abc import Iterable from functools import partial -from collections import Counter import numpy as np -import copy -import warnings +from ..base import is_regressor +from ..utils.multiclass import type_of_target from . import ( - r2_score, - median_absolute_error, - max_error, - mean_absolute_error, - mean_squared_error, - mean_squared_log_error, - mean_poisson_deviance, - mean_gamma_deviance, accuracy_score, - top_k_accuracy_score, - f1_score, - roc_auc_score, average_precision_score, - precision_score, - recall_score, - log_loss, balanced_accuracy_score, - explained_variance_score, brier_score_loss, + explained_variance_score, + f1_score, jaccard_score, - mean_absolute_percentage_error, + log_loss, matthews_corrcoef, + max_error, + mean_absolute_error, + mean_absolute_percentage_error, + mean_gamma_deviance, + mean_poisson_deviance, + mean_squared_error, + mean_squared_log_error, + median_absolute_error, + precision_score, + r2_score, + recall_score, + roc_auc_score, + top_k_accuracy_score, +) +from .cluster import ( + adjusted_mutual_info_score, + adjusted_rand_score, + completeness_score, + fowlkes_mallows_score, + homogeneity_score, + mutual_info_score, + normalized_mutual_info_score, + rand_score, + v_measure_score, ) - -from .cluster import adjusted_rand_score -from .cluster import rand_score -from .cluster import homogeneity_score -from .cluster import completeness_score -from .cluster import v_measure_score -from .cluster import mutual_info_score -from .cluster import adjusted_mutual_info_score -from .cluster import normalized_mutual_info_score -from .cluster import fowlkes_mallows_score - -from ..utils.multiclass import type_of_target -from ..base import is_regressor def _cached_call(cache, estimator, method, *args, **kwargs): diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py index fefb47b11903a..a332997a84414 100644 --- a/sklearn/metrics/cluster/__init__.py +++ b/sklearn/metrics/cluster/__init__.py @@ -5,25 +5,29 @@ - supervised, which uses a ground truth class values for each sample. - unsupervised, which does not and measures the 'quality' of the model itself. """ -from ._supervised import adjusted_mutual_info_score -from ._supervised import normalized_mutual_info_score -from ._supervised import adjusted_rand_score -from ._supervised import rand_score -from ._supervised import completeness_score -from ._supervised import contingency_matrix -from ._supervised import pair_confusion_matrix -from ._supervised import expected_mutual_information -from ._supervised import homogeneity_completeness_v_measure -from ._supervised import homogeneity_score -from ._supervised import mutual_info_score -from ._supervised import v_measure_score -from ._supervised import fowlkes_mallows_score -from ._supervised import entropy -from ._unsupervised import silhouette_samples -from ._unsupervised import silhouette_score -from ._unsupervised import calinski_harabasz_score -from ._unsupervised import davies_bouldin_score from ._bicluster import consensus_score +from ._supervised import ( + adjusted_mutual_info_score, + adjusted_rand_score, + completeness_score, + contingency_matrix, + entropy, + expected_mutual_information, + fowlkes_mallows_score, + homogeneity_completeness_v_measure, + homogeneity_score, + mutual_info_score, + normalized_mutual_info_score, + pair_confusion_matrix, + rand_score, + v_measure_score, +) +from ._unsupervised import ( + calinski_harabasz_score, + davies_bouldin_score, + silhouette_samples, + silhouette_score, +) __all__ = [ "adjusted_mutual_info_score", diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py index 3ed314dfa96d2..cd06b63bafdbf 100644 --- a/sklearn/metrics/cluster/_bicluster.py +++ b/sklearn/metrics/cluster/_bicluster.py @@ -1,7 +1,7 @@ import numpy as np from scipy.optimize import linear_sum_assignment -from ...utils.validation import check_consistent_length, check_array +from ...utils.validation import check_array, check_consistent_length __all__ = ["consensus_score"] diff --git a/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx b/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx index 1f9c0dc78bf95..50b5066d82e29 100644 --- a/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx +++ b/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx @@ -3,10 +3,12 @@ # License: BSD 3 clause from libc.math cimport exp, lgamma -from scipy.special import gammaln + import numpy as np -cimport numpy as cnp +from scipy.special import gammaln + cimport cython +cimport numpy as cnp cnp.import_array() ctypedef cnp.float64_t DOUBLE diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index a6a66884b70b2..b2fc15cb14538 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -22,9 +22,9 @@ import numpy as np from scipy import sparse as sp -from ._expected_mutual_info_fast import expected_mutual_information from ...utils.multiclass import type_of_target from ...utils.validation import check_array, check_consistent_length +from ._expected_mutual_info_fast import expected_mutual_information def check_clusterings(labels_true, labels_pred): diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index 3c25330cde707..fb234f2cbfb6f 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -10,12 +10,9 @@ import numpy as np -from ...utils import check_random_state -from ...utils import check_X_y -from ...utils import _safe_indexing -from ..pairwise import pairwise_distances_chunked -from ..pairwise import pairwise_distances from ...preprocessing import LabelEncoder +from ...utils import _safe_indexing, check_random_state, check_X_y +from ..pairwise import pairwise_distances, pairwise_distances_chunked def check_number_of_labels(n_labels, n_samples): diff --git a/sklearn/metrics/cluster/tests/test_bicluster.py b/sklearn/metrics/cluster/tests/test_bicluster.py index 2cbcb6e6826c7..53f7805100a13 100644 --- a/sklearn/metrics/cluster/tests/test_bicluster.py +++ b/sklearn/metrics/cluster/tests/test_bicluster.py @@ -2,10 +2,9 @@ import numpy as np -from sklearn.utils._testing import assert_almost_equal - -from sklearn.metrics.cluster._bicluster import _jaccard from sklearn.metrics import consensus_score +from sklearn.metrics.cluster._bicluster import _jaccard +from sklearn.utils._testing import assert_almost_equal def test_jaccard(): diff --git a/sklearn/metrics/cluster/tests/test_common.py b/sklearn/metrics/cluster/tests/test_common.py index a4e8c4530dbe6..bc32b7df7f561 100644 --- a/sklearn/metrics/cluster/tests/test_common.py +++ b/sklearn/metrics/cluster/tests/test_common.py @@ -1,25 +1,25 @@ from functools import partial from itertools import chain -import pytest import numpy as np +import pytest -from sklearn.metrics.cluster import adjusted_mutual_info_score -from sklearn.metrics.cluster import adjusted_rand_score -from sklearn.metrics.cluster import rand_score -from sklearn.metrics.cluster import completeness_score -from sklearn.metrics.cluster import fowlkes_mallows_score -from sklearn.metrics.cluster import homogeneity_score -from sklearn.metrics.cluster import mutual_info_score -from sklearn.metrics.cluster import normalized_mutual_info_score -from sklearn.metrics.cluster import v_measure_score -from sklearn.metrics.cluster import silhouette_score -from sklearn.metrics.cluster import calinski_harabasz_score -from sklearn.metrics.cluster import davies_bouldin_score - +from sklearn.metrics.cluster import ( + adjusted_mutual_info_score, + adjusted_rand_score, + calinski_harabasz_score, + completeness_score, + davies_bouldin_score, + fowlkes_mallows_score, + homogeneity_score, + mutual_info_score, + normalized_mutual_info_score, + rand_score, + silhouette_score, + v_measure_score, +) from sklearn.utils._testing import assert_allclose - # Dictionaries of metrics # ------------------------ # The goal of having those dictionaries is to have an easy way to call a diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index 4356a0a05286c..dfaa58ff62c01 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -2,28 +2,27 @@ import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal -from sklearn.metrics.cluster import adjusted_mutual_info_score -from sklearn.metrics.cluster import adjusted_rand_score -from sklearn.metrics.cluster import rand_score -from sklearn.metrics.cluster import completeness_score -from sklearn.metrics.cluster import contingency_matrix -from sklearn.metrics.cluster import pair_confusion_matrix -from sklearn.metrics.cluster import entropy -from sklearn.metrics.cluster import expected_mutual_information -from sklearn.metrics.cluster import fowlkes_mallows_score -from sklearn.metrics.cluster import homogeneity_completeness_v_measure -from sklearn.metrics.cluster import homogeneity_score -from sklearn.metrics.cluster import mutual_info_score -from sklearn.metrics.cluster import normalized_mutual_info_score -from sklearn.metrics.cluster import v_measure_score -from sklearn.metrics.cluster._supervised import _generalized_average -from sklearn.metrics.cluster._supervised import check_clusterings - +from sklearn.metrics.cluster import ( + adjusted_mutual_info_score, + adjusted_rand_score, + completeness_score, + contingency_matrix, + entropy, + expected_mutual_information, + fowlkes_mallows_score, + homogeneity_completeness_v_measure, + homogeneity_score, + mutual_info_score, + normalized_mutual_info_score, + pair_confusion_matrix, + rand_score, + v_measure_score, +) +from sklearn.metrics.cluster._supervised import _generalized_average, check_clusterings from sklearn.utils import assert_all_finite from sklearn.utils._testing import assert_almost_equal -from numpy.testing import assert_array_equal, assert_array_almost_equal, assert_allclose - score_funcs = [ adjusted_rand_score, diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py index 22dd1a1bf1557..9fde388fcda5a 100644 --- a/sklearn/metrics/cluster/tests/test_unsupervised.py +++ b/sklearn/metrics/cluster/tests/test_unsupervised.py @@ -1,17 +1,19 @@ import warnings import numpy as np -import scipy.sparse as sp import pytest +import scipy.sparse as sp from scipy.sparse import csr_matrix from sklearn import datasets -from sklearn.utils._testing import assert_array_equal -from sklearn.metrics.cluster import silhouette_score -from sklearn.metrics.cluster import silhouette_samples from sklearn.metrics import pairwise_distances -from sklearn.metrics.cluster import calinski_harabasz_score -from sklearn.metrics.cluster import davies_bouldin_score +from sklearn.metrics.cluster import ( + calinski_harabasz_score, + davies_bouldin_score, + silhouette_samples, + silhouette_score, +) +from sklearn.utils._testing import assert_array_equal def test_silhouette(): diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 33b2a9901902b..84446705d827b 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -8,31 +8,31 @@ # License: BSD 3 clause import itertools -from functools import partial import warnings +from functools import partial import numpy as np +from scipy.sparse import csr_matrix, issparse from scipy.spatial import distance -from scipy.sparse import csr_matrix -from scipy.sparse import issparse + from joblib import Parallel, effective_n_jobs from .. import config_context -from ..utils.validation import _num_samples -from ..utils.validation import check_non_negative -from ..utils import check_array -from ..utils import gen_even_slices -from ..utils import gen_batches, get_chunk_n_rows -from ..utils import is_scalar_nan -from ..utils.extmath import row_norms, safe_sparse_dot +from ..exceptions import DataConversionWarning from ..preprocessing import normalize +from ..utils import ( + check_array, + gen_batches, + gen_even_slices, + get_chunk_n_rows, + is_scalar_nan, +) from ..utils._mask import _get_mask -from ..utils.fixes import delayed -from ..utils.fixes import sp_version, parse_version - +from ..utils.extmath import row_norms, safe_sparse_dot +from ..utils.fixes import delayed, parse_version, sp_version +from ..utils.validation import _num_samples, check_non_negative from ._pairwise_distances_reduction import PairwiseDistancesArgKmin from ._pairwise_fast import _chi2_kernel_fast, _sparse_manhattan -from ..exceptions import DataConversionWarning # Utility Functions diff --git a/sklearn/metrics/setup.py b/sklearn/metrics/setup.py index 736ba6d7d4424..d2d40afd2d889 100644 --- a/sklearn/metrics/setup.py +++ b/sklearn/metrics/setup.py @@ -1,6 +1,6 @@ import os -import numpy as np +import numpy as np from numpy.distutils.misc_util import Configuration diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 25c2dcda55d9c..6a977dc1e3b35 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1,52 +1,49 @@ -from functools import partial -from itertools import product -from itertools import chain -from itertools import permutations -import warnings import re +import warnings +from functools import partial +from itertools import chain, permutations, product import numpy as np -from scipy import linalg import pytest +from scipy import linalg +from scipy.spatial.distance import hamming as sp_hamming -from sklearn import datasets -from sklearn import svm - +from sklearn import datasets, svm from sklearn.datasets import make_multilabel_classification -from sklearn.preprocessing import label_binarize, LabelBinarizer -from sklearn.utils.validation import check_random_state -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_no_warnings -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._mocking import MockDataFrame - -from sklearn.metrics import accuracy_score -from sklearn.metrics import average_precision_score -from sklearn.metrics import balanced_accuracy_score -from sklearn.metrics import classification_report -from sklearn.metrics import cohen_kappa_score -from sklearn.metrics import confusion_matrix -from sklearn.metrics import f1_score -from sklearn.metrics import fbeta_score -from sklearn.metrics import hamming_loss -from sklearn.metrics import hinge_loss -from sklearn.metrics import jaccard_score -from sklearn.metrics import log_loss -from sklearn.metrics import matthews_corrcoef -from sklearn.metrics import precision_recall_fscore_support -from sklearn.metrics import precision_score -from sklearn.metrics import recall_score -from sklearn.metrics import zero_one_loss -from sklearn.metrics import brier_score_loss -from sklearn.metrics import multilabel_confusion_matrix - -from sklearn.metrics._classification import _check_targets from sklearn.exceptions import UndefinedMetricWarning - -from scipy.spatial.distance import hamming as sp_hamming +from sklearn.metrics import ( + accuracy_score, + average_precision_score, + balanced_accuracy_score, + brier_score_loss, + classification_report, + cohen_kappa_score, + confusion_matrix, + f1_score, + fbeta_score, + hamming_loss, + hinge_loss, + jaccard_score, + log_loss, + matthews_corrcoef, + multilabel_confusion_matrix, + precision_recall_fscore_support, + precision_score, + recall_score, + zero_one_loss, +) +from sklearn.metrics._classification import _check_targets +from sklearn.preprocessing import LabelBinarizer, label_binarize +from sklearn.utils._mocking import MockDataFrame +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + assert_no_warnings, + ignore_warnings, +) +from sklearn.utils.validation import check_random_state ############################################################################### # Utilities for testing @@ -2438,7 +2435,7 @@ def test_log_loss_pandas_input(): y_pr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]]) types = [(MockDataFrame, MockDataFrame)] try: - from pandas import Series, DataFrame + from pandas import DataFrame, Series types.append((Series, DataFrame)) except ImportError: diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 1e627f9f86676..8bff90e00a1a9 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1,71 +1,67 @@ from functools import partial from inspect import signature -from itertools import product -from itertools import chain -from itertools import permutations +from itertools import chain, permutations, product import numpy as np -import scipy.sparse as sp - import pytest +import scipy.sparse as sp from sklearn.datasets import make_multilabel_classification +from sklearn.metrics import ( + accuracy_score, + average_precision_score, + balanced_accuracy_score, + brier_score_loss, + cohen_kappa_score, + confusion_matrix, + coverage_error, + d2_absolute_error_score, + d2_pinball_score, + d2_tweedie_score, + dcg_score, + det_curve, + explained_variance_score, + f1_score, + fbeta_score, + hamming_loss, + hinge_loss, + jaccard_score, + label_ranking_average_precision_score, + label_ranking_loss, + log_loss, + matthews_corrcoef, + max_error, + mean_absolute_error, + mean_absolute_percentage_error, + mean_gamma_deviance, + mean_pinball_loss, + mean_poisson_deviance, + mean_squared_error, + mean_tweedie_deviance, + median_absolute_error, + multilabel_confusion_matrix, + ndcg_score, + precision_recall_curve, + precision_score, + r2_score, + recall_score, + roc_auc_score, + roc_curve, + top_k_accuracy_score, + zero_one_loss, +) +from sklearn.metrics._base import _average_binary_score from sklearn.preprocessing import LabelBinarizer -from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import _num_samples -from sklearn.utils.validation import check_random_state from sklearn.utils import shuffle - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_less -from sklearn.utils._testing import ignore_warnings - -from sklearn.metrics import accuracy_score -from sklearn.metrics import average_precision_score -from sklearn.metrics import balanced_accuracy_score -from sklearn.metrics import brier_score_loss -from sklearn.metrics import cohen_kappa_score -from sklearn.metrics import confusion_matrix -from sklearn.metrics import coverage_error -from sklearn.metrics import d2_tweedie_score -from sklearn.metrics import d2_pinball_score -from sklearn.metrics import d2_absolute_error_score -from sklearn.metrics import det_curve -from sklearn.metrics import explained_variance_score -from sklearn.metrics import f1_score -from sklearn.metrics import fbeta_score -from sklearn.metrics import hamming_loss -from sklearn.metrics import hinge_loss -from sklearn.metrics import jaccard_score -from sklearn.metrics import label_ranking_average_precision_score -from sklearn.metrics import label_ranking_loss -from sklearn.metrics import log_loss -from sklearn.metrics import max_error -from sklearn.metrics import matthews_corrcoef -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_absolute_percentage_error -from sklearn.metrics import mean_squared_error -from sklearn.metrics import mean_tweedie_deviance -from sklearn.metrics import mean_poisson_deviance -from sklearn.metrics import mean_gamma_deviance -from sklearn.metrics import median_absolute_error -from sklearn.metrics import multilabel_confusion_matrix -from sklearn.metrics import mean_pinball_loss -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import precision_score -from sklearn.metrics import r2_score -from sklearn.metrics import recall_score -from sklearn.metrics import roc_auc_score -from sklearn.metrics import roc_curve -from sklearn.metrics import zero_one_loss -from sklearn.metrics import ndcg_score -from sklearn.metrics import dcg_score -from sklearn.metrics import top_k_accuracy_score - -from sklearn.metrics._base import _average_binary_score - +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_equal, + assert_array_less, + ignore_warnings, +) +from sklearn.utils.multiclass import type_of_target +from sklearn.utils.validation import _num_samples, check_random_state # Note toward developers about metric testing # ------------------------------------------- diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py index 6c841d1d44f8c..e74c669d47d6a 100644 --- a/sklearn/metrics/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -1,19 +1,18 @@ +import copy import itertools import pickle -import copy import numpy as np -from numpy.testing import assert_array_almost_equal - import pytest - import scipy.sparse as sp +from numpy.testing import assert_array_almost_equal from scipy.spatial.distance import cdist + from sklearn.metrics import DistanceMetric from sklearn.metrics._dist_metrics import BOOL_METRICS from sklearn.utils import check_random_state from sklearn.utils._testing import create_memmap_backed_data -from sklearn.utils.fixes import sp_version, parse_version +from sklearn.utils.fixes import parse_version, sp_version def dist_func(x1, x2, p): diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index f14c558d5a3c1..a331c2f05580d 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -3,10 +3,15 @@ import numpy as np from numpy import linalg - -from scipy.sparse import dok_matrix, csr_matrix, issparse -from scipy.spatial.distance import cosine, cityblock, minkowski -from scipy.spatial.distance import cdist, pdist, squareform +from scipy.sparse import csr_matrix, dok_matrix, issparse +from scipy.spatial.distance import ( + cdist, + cityblock, + cosine, + minkowski, + pdist, + squareform, +) try: from scipy.spatial.distance import wminkowski @@ -15,47 +20,49 @@ # should be used instead. from scipy.spatial.distance import minkowski as wminkowski -from sklearn.utils.fixes import sp_version, parse_version - import pytest from sklearn import config_context - -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings - -from sklearn.metrics.pairwise import euclidean_distances -from sklearn.metrics.pairwise import nan_euclidean_distances -from sklearn.metrics.pairwise import manhattan_distances -from sklearn.metrics.pairwise import haversine_distances -from sklearn.metrics.pairwise import linear_kernel -from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel -from sklearn.metrics.pairwise import polynomial_kernel -from sklearn.metrics.pairwise import rbf_kernel -from sklearn.metrics.pairwise import laplacian_kernel -from sklearn.metrics.pairwise import sigmoid_kernel -from sklearn.metrics.pairwise import cosine_similarity -from sklearn.metrics.pairwise import cosine_distances -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.metrics.pairwise import pairwise_distances_chunked -from sklearn.metrics.pairwise import pairwise_distances_argmin_min -from sklearn.metrics.pairwise import pairwise_distances_argmin -from sklearn.metrics.pairwise import pairwise_kernels -from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS -from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS -from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS -from sklearn.metrics.pairwise import PAIRED_DISTANCES -from sklearn.metrics.pairwise import check_pairwise_arrays -from sklearn.metrics.pairwise import check_paired_arrays -from sklearn.metrics.pairwise import paired_distances -from sklearn.metrics.pairwise import paired_euclidean_distances -from sklearn.metrics.pairwise import paired_manhattan_distances -from sklearn.metrics.pairwise import _euclidean_distances_upcast -from sklearn.preprocessing import normalize from sklearn.exceptions import DataConversionWarning +from sklearn.metrics.pairwise import ( + PAIRED_DISTANCES, + PAIRWISE_BOOLEAN_FUNCTIONS, + PAIRWISE_DISTANCE_FUNCTIONS, + PAIRWISE_KERNEL_FUNCTIONS, + _euclidean_distances_upcast, + additive_chi2_kernel, + check_paired_arrays, + check_pairwise_arrays, + chi2_kernel, + cosine_distances, + cosine_similarity, + euclidean_distances, + haversine_distances, + laplacian_kernel, + linear_kernel, + manhattan_distances, + nan_euclidean_distances, + paired_distances, + paired_euclidean_distances, + paired_manhattan_distances, + pairwise_distances, + pairwise_distances_argmin, + pairwise_distances_argmin_min, + pairwise_distances_chunked, + pairwise_kernels, + polynomial_kernel, + rbf_kernel, + sigmoid_kernel, +) +from sklearn.preprocessing import normalize +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.fixes import parse_version, sp_version def test_pairwise_distances(): diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index 192f7ef43a6c6..06aca2dd3c036 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -1,19 +1,18 @@ import numpy as np import pytest import threadpoolctl -from numpy.testing import assert_array_equal, assert_allclose +from numpy.testing import assert_allclose, assert_array_equal from scipy.sparse import csr_matrix from scipy.spatial.distance import cdist +from sklearn.metrics import euclidean_distances from sklearn.metrics._pairwise_distances_reduction import ( - PairwiseDistancesReduction, PairwiseDistancesArgKmin, PairwiseDistancesRadiusNeighborhood, + PairwiseDistancesReduction, _sqeuclidean_row_norms, ) - -from sklearn.metrics import euclidean_distances -from sklearn.utils.fixes import sp_version, parse_version +from sklearn.utils.fixes import parse_version, sp_version # Common supported metric between scipy.spatial.distance.cdist # and PairwiseDistancesReduction. diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 7d2338337b83d..9315cc8d29a4c 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -1,41 +1,44 @@ import re -import pytest -import numpy as np import warnings -from scipy.sparse import csr_matrix -from sklearn import datasets -from sklearn import svm +import numpy as np +import pytest +from scipy.sparse import csr_matrix -from sklearn.utils.extmath import softmax +from sklearn import datasets, svm from sklearn.datasets import make_multilabel_classification -from sklearn.random_projection import _sparse_random_matrix -from sklearn.utils.validation import check_array, check_consistent_length -from sklearn.utils.validation import check_random_state - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal - -from sklearn.metrics import accuracy_score -from sklearn.metrics import auc -from sklearn.metrics import average_precision_score -from sklearn.metrics import coverage_error -from sklearn.metrics import det_curve -from sklearn.metrics import label_ranking_average_precision_score -from sklearn.metrics import precision_recall_curve -from sklearn.metrics import label_ranking_loss -from sklearn.metrics import roc_auc_score -from sklearn.metrics import roc_curve -from sklearn.metrics._ranking import _ndcg_sample_scores, _dcg_sample_scores -from sklearn.metrics import ndcg_score, dcg_score -from sklearn.metrics import top_k_accuracy_score - from sklearn.exceptions import UndefinedMetricWarning -from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression - +from sklearn.metrics import ( + accuracy_score, + auc, + average_precision_score, + coverage_error, + dcg_score, + det_curve, + label_ranking_average_precision_score, + label_ranking_loss, + ndcg_score, + precision_recall_curve, + roc_auc_score, + roc_curve, + top_k_accuracy_score, +) +from sklearn.metrics._ranking import _dcg_sample_scores, _ndcg_sample_scores +from sklearn.model_selection import train_test_split +from sklearn.random_projection import _sparse_random_matrix +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) +from sklearn.utils.extmath import softmax +from sklearn.utils.validation import ( + check_array, + check_consistent_length, + check_random_state, +) ############################################################################### # Utilities for testing diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index 090bc64bf0fe4..f7265aab2e282 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -1,34 +1,36 @@ +from itertools import product + import numpy as np -from scipy import optimize +import pytest from numpy.testing import assert_allclose +from scipy import optimize from scipy.special import factorial, xlogy -from itertools import product -import pytest -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal from sklearn.dummy import DummyRegressor -from sklearn.model_selection import GridSearchCV - -from sklearn.metrics import explained_variance_score -from sklearn.metrics import mean_absolute_error -from sklearn.metrics import mean_squared_error -from sklearn.metrics import mean_squared_log_error -from sklearn.metrics import median_absolute_error -from sklearn.metrics import mean_absolute_percentage_error -from sklearn.metrics import max_error -from sklearn.metrics import mean_pinball_loss -from sklearn.metrics import r2_score -from sklearn.metrics import mean_tweedie_deviance -from sklearn.metrics import d2_tweedie_score -from sklearn.metrics import d2_pinball_score -from sklearn.metrics import d2_absolute_error_score -from sklearn.metrics import make_scorer - -from sklearn.metrics._regression import _check_reg_targets - from sklearn.exceptions import UndefinedMetricWarning +from sklearn.metrics import ( + d2_absolute_error_score, + d2_pinball_score, + d2_tweedie_score, + explained_variance_score, + make_scorer, + max_error, + mean_absolute_error, + mean_absolute_percentage_error, + mean_pinball_loss, + mean_squared_error, + mean_squared_log_error, + mean_tweedie_deviance, + median_absolute_error, + r2_score, +) +from sklearn.metrics._regression import _check_reg_targets +from sklearn.model_selection import GridSearchCV +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) def test_regression_metrics(n_samples=50): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 23680e48ae3e7..fec3c6469fbca 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1,61 +1,68 @@ -from copy import deepcopy +import numbers +import os import pickle -import tempfile import shutil -import os -import numbers -from unittest.mock import Mock +import tempfile +from copy import deepcopy from functools import partial +from unittest.mock import Mock import numpy as np import pytest -import joblib - from numpy.testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings +import joblib from sklearn.base import BaseEstimator +from sklearn.cluster import KMeans +from sklearn.datasets import ( + load_diabetes, + make_blobs, + make_classification, + make_multilabel_classification, + make_regression, +) +from sklearn.linear_model import LogisticRegression, Perceptron, Ridge from sklearn.metrics import ( + SCORERS, accuracy_score, - balanced_accuracy_score, average_precision_score, + balanced_accuracy_score, brier_score_loss, + check_scoring, +) +from sklearn.metrics import cluster as cluster_module +from sklearn.metrics import ( f1_score, fbeta_score, + get_scorer, + get_scorer_names, jaccard_score, log_loss, + make_scorer, + matthews_corrcoef, precision_score, r2_score, recall_score, roc_auc_score, top_k_accuracy_score, - matthews_corrcoef, ) -from sklearn.metrics import cluster as cluster_module -from sklearn.metrics import check_scoring from sklearn.metrics._scorer import ( - _PredictScorer, - _passthrough_scorer, - _MultimetricScorer, _check_multimetric_scoring, + _MultimetricScorer, + _passthrough_scorer, + _PredictScorer, ) -from sklearn.metrics import make_scorer, get_scorer, SCORERS, get_scorer_names +from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split +from sklearn.multiclass import OneVsRestClassifier from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import LinearSVC from sklearn.pipeline import make_pipeline -from sklearn.cluster import KMeans -from sklearn.linear_model import Ridge, LogisticRegression, Perceptron +from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.datasets import make_blobs -from sklearn.datasets import make_classification, make_regression -from sklearn.datasets import make_multilabel_classification -from sklearn.datasets import load_diabetes -from sklearn.model_selection import train_test_split, cross_val_score -from sklearn.model_selection import GridSearchCV -from sklearn.multiclass import OneVsRestClassifier - +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_equal, + ignore_warnings, +) REGRESSION_SCORERS = [ "explained_variance", diff --git a/sklearn/mixture/__init__.py b/sklearn/mixture/__init__.py index c5c20aa38eb18..f0018196ffc98 100644 --- a/sklearn/mixture/__init__.py +++ b/sklearn/mixture/__init__.py @@ -2,8 +2,7 @@ The :mod:`sklearn.mixture` module implements mixture modeling algorithms. """ -from ._gaussian_mixture import GaussianMixture from ._bayesian_mixture import BayesianGaussianMixture - +from ._gaussian_mixture import GaussianMixture __all__ = ["GaussianMixture", "BayesianGaussianMixture"] diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 2edc3b57aa4d2..15d5b01de356e 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -13,9 +13,8 @@ from scipy.special import logsumexp from .. import cluster +from ..base import BaseEstimator, DensityMixin from ..cluster import kmeans_plusplus -from ..base import BaseEstimator -from ..base import DensityMixin from ..exceptions import ConvergenceWarning from ..utils import check_random_state, check_scalar from ..utils.validation import check_is_fitted diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py index 704c191638863..617a8f944c9f0 100644 --- a/sklearn/mixture/_bayesian_mixture.py +++ b/sklearn/mixture/_bayesian_mixture.py @@ -4,17 +4,20 @@ # License: BSD 3 clause import math + import numpy as np from scipy.special import betaln, digamma, gammaln -from ._base import BaseMixture, _check_shape -from ._gaussian_mixture import _check_precision_matrix -from ._gaussian_mixture import _check_precision_positivity -from ._gaussian_mixture import _compute_log_det_cholesky -from ._gaussian_mixture import _compute_precision_cholesky -from ._gaussian_mixture import _estimate_gaussian_parameters -from ._gaussian_mixture import _estimate_log_gaussian_prob from ..utils import check_array +from ._base import BaseMixture, _check_shape +from ._gaussian_mixture import ( + _check_precision_matrix, + _check_precision_positivity, + _compute_log_det_cholesky, + _compute_precision_cholesky, + _estimate_gaussian_parameters, + _estimate_log_gaussian_prob, +) def _log_dirichlet_norm(dirichlet_concentration): diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 66a87c6e9c136..73a36e0e5e242 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -5,13 +5,11 @@ # License: BSD 3 clause import numpy as np - from scipy import linalg -from ._base import BaseMixture, _check_shape from ..utils import check_array from ..utils.extmath import row_norms - +from ._base import BaseMixture, _check_shape ############################################################################### # Gaussian mixture shape checkers used by the GaussianMixture class diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index 2cd54aef5b943..0600594550191 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -5,23 +5,19 @@ import re import numpy as np -from scipy.special import gammaln import pytest +from scipy.special import gammaln -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal - +from sklearn.exceptions import ConvergenceWarning, NotFittedError from sklearn.metrics.cluster import adjusted_rand_score - -from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm -from sklearn.mixture._bayesian_mixture import _log_wishart_norm - from sklearn.mixture import BayesianGaussianMixture - +from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm from sklearn.mixture.tests.test_gaussian_mixture import RandomData -from sklearn.exceptions import ConvergenceWarning, NotFittedError -from sklearn.utils._testing import ignore_warnings - +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_equal, + ignore_warnings, +) COVARIANCE_TYPE = ["full", "tied", "diag", "spherical"] PRIOR_TYPE = ["dirichlet_process", "dirichlet_distribution"] diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e9a19c2239f8a..f1e9e29c7efb4 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -2,39 +2,40 @@ # Thierry Guillemot # License: BSD 3 clause +import copy import itertools import re import sys -import copy import warnings -import pytest +from io import StringIO import numpy as np -from scipy import stats, linalg +import pytest +from scipy import linalg, stats from sklearn.cluster import KMeans from sklearn.covariance import EmpiricalCovariance from sklearn.datasets import make_spd_matrix -from io import StringIO +from sklearn.exceptions import ConvergenceWarning, NotFittedError from sklearn.metrics.cluster import adjusted_rand_score from sklearn.mixture import GaussianMixture from sklearn.mixture._gaussian_mixture import ( - _estimate_gaussian_covariances_full, - _estimate_gaussian_covariances_tied, + _compute_log_det_cholesky, + _compute_precision_cholesky, _estimate_gaussian_covariances_diag, + _estimate_gaussian_covariances_full, _estimate_gaussian_covariances_spherical, + _estimate_gaussian_covariances_tied, _estimate_gaussian_parameters, - _compute_precision_cholesky, - _compute_log_det_cholesky, ) -from sklearn.exceptions import ConvergenceWarning, NotFittedError +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) from sklearn.utils.extmath import fast_logdet -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings - COVARIANCE_TYPE = ["full", "tied", "diag", "spherical"] diff --git a/sklearn/mixture/tests/test_mixture.py b/sklearn/mixture/tests/test_mixture.py index eeb71d0f89407..f0ea3494f0e7d 100644 --- a/sklearn/mixture/tests/test_mixture.py +++ b/sklearn/mixture/tests/test_mixture.py @@ -1,11 +1,10 @@ # Author: Guillaume Lemaitre # License: BSD 3 clause -import pytest import numpy as np +import pytest -from sklearn.mixture import GaussianMixture -from sklearn.mixture import BayesianGaussianMixture +from sklearn.mixture import BayesianGaussianMixture, GaussianMixture @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()]) diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py index a481f5db72fdf..161b66a68edfc 100644 --- a/sklearn/model_selection/__init__.py +++ b/sklearn/model_selection/__init__.py @@ -1,36 +1,35 @@ import typing -from ._split import BaseCrossValidator -from ._split import BaseShuffleSplit -from ._split import KFold -from ._split import GroupKFold -from ._split import StratifiedKFold -from ._split import TimeSeriesSplit -from ._split import LeaveOneGroupOut -from ._split import LeaveOneOut -from ._split import LeavePGroupsOut -from ._split import LeavePOut -from ._split import RepeatedKFold -from ._split import RepeatedStratifiedKFold -from ._split import ShuffleSplit -from ._split import GroupShuffleSplit -from ._split import StratifiedShuffleSplit -from ._split import StratifiedGroupKFold -from ._split import PredefinedSplit -from ._split import train_test_split -from ._split import check_cv - -from ._validation import cross_val_score -from ._validation import cross_val_predict -from ._validation import cross_validate -from ._validation import learning_curve -from ._validation import permutation_test_score -from ._validation import validation_curve - -from ._search import GridSearchCV -from ._search import RandomizedSearchCV -from ._search import ParameterGrid -from ._search import ParameterSampler +from ._search import GridSearchCV, ParameterGrid, ParameterSampler, RandomizedSearchCV +from ._split import ( + BaseCrossValidator, + BaseShuffleSplit, + GroupKFold, + GroupShuffleSplit, + KFold, + LeaveOneGroupOut, + LeaveOneOut, + LeavePGroupsOut, + LeavePOut, + PredefinedSplit, + RepeatedKFold, + RepeatedStratifiedKFold, + ShuffleSplit, + StratifiedGroupKFold, + StratifiedKFold, + StratifiedShuffleSplit, + TimeSeriesSplit, + check_cv, + train_test_split, +) +from ._validation import ( + cross_val_predict, + cross_val_score, + cross_validate, + learning_curve, + permutation_test_score, + validation_curve, +) if typing.TYPE_CHECKING: # Avoid errors in type checkers (e.g. mypy) for experimental estimators. diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 5ceb71569b932..f3ab5c40f82fe 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -10,38 +10,40 @@ # Raghav RV # License: BSD 3 clause -from abc import ABCMeta, abstractmethod -from collections import defaultdict -from collections.abc import Mapping, Sequence, Iterable -from functools import partial, reduce -from itertools import product import numbers import operator import time import warnings +from abc import ABCMeta, abstractmethod +from collections import defaultdict +from collections.abc import Iterable, Mapping, Sequence +from functools import partial, reduce +from itertools import product import numpy as np from numpy.ma import MaskedArray from scipy.stats import rankdata -from ..base import BaseEstimator, is_classifier, clone -from ..base import MetaEstimatorMixin -from ._split import check_cv -from ._validation import _fit_and_score -from ._validation import _aggregate_score_dicts -from ._validation import _insert_error_scores -from ._validation import _normalize_score_results -from ._validation import _warn_or_raise_about_fit_failures -from ..exceptions import NotFittedError from joblib import Parallel + +from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier +from ..exceptions import NotFittedError +from ..metrics import check_scoring +from ..metrics._scorer import _check_multimetric_scoring from ..utils import check_random_state -from ..utils.random import sample_without_replacement from ..utils._tags import _safe_tags -from ..utils.validation import indexable, check_is_fitted, _check_fit_params -from ..utils.metaestimators import available_if from ..utils.fixes import delayed -from ..metrics._scorer import _check_multimetric_scoring -from ..metrics import check_scoring +from ..utils.metaestimators import available_if +from ..utils.random import sample_without_replacement +from ..utils.validation import _check_fit_params, check_is_fitted, indexable +from ._split import check_cv +from ._validation import ( + _aggregate_score_dicts, + _fit_and_score, + _insert_error_scores, + _normalize_score_results, + _warn_or_raise_about_fit_failures, +) __all__ = ["GridSearchCV", "ParameterGrid", "ParameterSampler", "RandomizedSearchCV"] diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index 940c4c93831f5..62aa157c9ace5 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -1,17 +1,17 @@ +from abc import abstractmethod from copy import deepcopy from math import ceil, floor, log -from abc import abstractmethod from numbers import Integral import numpy as np -from ._search import BaseSearchCV -from . import ParameterGrid, ParameterSampler + from ..base import is_classifier -from ._split import check_cv, _yields_constant_splits from ..utils import resample from ..utils.multiclass import check_classification_targets from ..utils.validation import _num_samples - +from . import ParameterGrid, ParameterSampler +from ._search import BaseSearchCV +from ._split import _yields_constant_splits, check_cv __all__ = ["HalvingGridSearchCV", "HalvingRandomSearchCV"] diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index d2a0b5e1fc329..08ca4de3c7457 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -11,24 +11,22 @@ # Rodion Martynov # License: BSD 3 clause -from collections.abc import Iterable -from collections import defaultdict -import warnings -from itertools import chain, combinations -from math import ceil, floor import numbers +import warnings from abc import ABCMeta, abstractmethod +from collections import defaultdict +from collections.abc import Iterable from inspect import signature +from itertools import chain, combinations +from math import ceil, floor import numpy as np from scipy.special import comb -from ..utils import indexable, check_random_state, _safe_indexing -from ..utils import _approximate_mode -from ..utils.validation import _num_samples, column_or_1d -from ..utils.validation import check_array -from ..utils.multiclass import type_of_target from ..base import _pprint +from ..utils import _approximate_mode, _safe_indexing, check_random_state, indexable +from ..utils.multiclass import type_of_target +from ..utils.validation import _num_samples, check_array, column_or_1d __all__ = [ "BaseCrossValidator", diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index d83fca63da48c..3c03cdfc44873 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -11,30 +11,29 @@ # License: BSD 3 clause -import warnings import numbers import time +import warnings +from collections import Counter +from contextlib import suppress from functools import partial from traceback import format_exc -from contextlib import suppress -from collections import Counter import numpy as np import scipy.sparse as sp + from joblib import Parallel, logger -from ..base import is_classifier, clone -from ..utils import indexable, check_random_state, _safe_indexing -from ..utils.validation import _check_fit_params -from ..utils.validation import _num_samples -from ..utils.fixes import delayed -from ..utils.metaestimators import _safe_split +from ..base import clone, is_classifier +from ..exceptions import FitFailedWarning from ..metrics import check_scoring from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer -from ..exceptions import FitFailedWarning -from ._split import check_cv from ..preprocessing import LabelEncoder - +from ..utils import _safe_indexing, check_random_state, indexable +from ..utils.fixes import delayed +from ..utils.metaestimators import _safe_split +from ..utils.validation import _check_fit_params, _num_samples +from ._split import check_cv __all__ = [ "cross_validate", diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index e1466d69d3902..c109f2915aa10 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1,75 +1,71 @@ """Test the search module""" +import pickle +import re +import sys from collections.abc import Iterable, Sized +from functools import partial from io import StringIO from itertools import chain, product -from functools import partial -import pickle -import sys from types import GeneratorType -import re import numpy as np -import scipy.sparse as sp import pytest +import scipy.sparse as sp +from scipy.stats import bernoulli, expon, uniform +from sklearn.base import BaseEstimator, ClassifierMixin, is_classifier +from sklearn.cluster import KMeans +from sklearn.datasets import ( + make_blobs, + make_classification, + make_multilabel_classification, +) +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.impute import SimpleImputer +from sklearn.linear_model import LinearRegression, Ridge, SGDClassifier +from sklearn.metrics import ( + accuracy_score, + confusion_matrix, + f1_score, + make_scorer, + r2_score, + recall_score, + roc_auc_score, +) +from sklearn.metrics.pairwise import euclidean_distances +from sklearn.model_selection import ( + GridSearchCV, + GroupKFold, + GroupShuffleSplit, + KFold, + LeaveOneGroupOut, + LeavePGroupsOut, + ParameterGrid, + ParameterSampler, + RandomizedSearchCV, + StratifiedKFold, + StratifiedShuffleSplit, + train_test_split, +) +from sklearn.model_selection._search import BaseSearchCV +from sklearn.model_selection._validation import FitFailedWarning +from sklearn.model_selection.tests.common import OneTimeSplitter +from sklearn.neighbors import KernelDensity, KNeighborsClassifier, LocalOutlierFactor +from sklearn.pipeline import Pipeline +from sklearn.svm import SVC, LinearSVC +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.utils._mocking import CheckingClassifier, MockDataFrame from sklearn.utils._testing import ( - assert_array_equal, - assert_array_almost_equal, - assert_allclose, - assert_almost_equal, - ignore_warnings, MinimalClassifier, MinimalRegressor, MinimalTransformer, + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, ) -from sklearn.utils._mocking import CheckingClassifier, MockDataFrame - -from scipy.stats import bernoulli, expon, uniform - -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.base import is_classifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_blobs -from sklearn.datasets import make_multilabel_classification - -from sklearn.model_selection import train_test_split -from sklearn.model_selection import KFold -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import StratifiedShuffleSplit -from sklearn.model_selection import LeaveOneGroupOut -from sklearn.model_selection import LeavePGroupsOut -from sklearn.model_selection import GroupKFold -from sklearn.model_selection import GroupShuffleSplit -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import RandomizedSearchCV -from sklearn.model_selection import ParameterGrid -from sklearn.model_selection import ParameterSampler -from sklearn.model_selection._search import BaseSearchCV - -from sklearn.model_selection._validation import FitFailedWarning - -from sklearn.svm import LinearSVC, SVC -from sklearn.tree import DecisionTreeRegressor -from sklearn.tree import DecisionTreeClassifier -from sklearn.cluster import KMeans -from sklearn.neighbors import KernelDensity -from sklearn.neighbors import LocalOutlierFactor -from sklearn.neighbors import KNeighborsClassifier -from sklearn.metrics import f1_score -from sklearn.metrics import recall_score -from sklearn.metrics import accuracy_score -from sklearn.metrics import make_scorer -from sklearn.metrics import roc_auc_score -from sklearn.metrics import confusion_matrix -from sklearn.metrics import r2_score -from sklearn.metrics.pairwise import euclidean_distances -from sklearn.impute import SimpleImputer -from sklearn.pipeline import Pipeline -from sklearn.linear_model import Ridge, SGDClassifier, LinearRegression -from sklearn.ensemble import HistGradientBoostingClassifier - -from sklearn.model_selection.tests.common import OneTimeSplitter # Neither of the following two estimators inherit from BaseEstimator, @@ -785,7 +781,7 @@ def test_pandas_input(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import Series, DataFrame + from pandas import DataFrame, Series types.append((DataFrame, Series)) except ImportError: diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index f502ebc8a3b6a..58abeb148fdfc 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -1,52 +1,51 @@ """Test the split module""" -import warnings -import pytest import re +import warnings +from itertools import combinations, combinations_with_replacement, permutations + import numpy as np -from scipy.sparse import coo_matrix, csc_matrix, csr_matrix +import pytest from scipy import stats +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix from scipy.special import comb -from itertools import combinations -from itertools import combinations_with_replacement -from itertools import permutations - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils.validation import _num_samples -from sklearn.utils._mocking import MockDataFrame - -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import KFold -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import GroupKFold -from sklearn.model_selection import TimeSeriesSplit -from sklearn.model_selection import LeaveOneOut -from sklearn.model_selection import LeaveOneGroupOut -from sklearn.model_selection import LeavePOut -from sklearn.model_selection import LeavePGroupsOut -from sklearn.model_selection import ShuffleSplit -from sklearn.model_selection import GroupShuffleSplit -from sklearn.model_selection import StratifiedShuffleSplit -from sklearn.model_selection import PredefinedSplit -from sklearn.model_selection import check_cv -from sklearn.model_selection import train_test_split -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import RepeatedKFold -from sklearn.model_selection import RepeatedStratifiedKFold -from sklearn.model_selection import StratifiedGroupKFold +from sklearn.datasets import load_digits, make_classification from sklearn.dummy import DummyClassifier - -from sklearn.model_selection._split import _validate_shuffle_split -from sklearn.model_selection._split import _build_repr -from sklearn.model_selection._split import _yields_constant_splits - -from sklearn.datasets import load_digits -from sklearn.datasets import make_classification - +from sklearn.model_selection import ( + GridSearchCV, + GroupKFold, + GroupShuffleSplit, + KFold, + LeaveOneGroupOut, + LeaveOneOut, + LeavePGroupsOut, + LeavePOut, + PredefinedSplit, + RepeatedKFold, + RepeatedStratifiedKFold, + ShuffleSplit, + StratifiedGroupKFold, + StratifiedKFold, + StratifiedShuffleSplit, + TimeSeriesSplit, + check_cv, + cross_val_score, + train_test_split, +) +from sklearn.model_selection._split import ( + _build_repr, + _validate_shuffle_split, + _yields_constant_splits, +) from sklearn.svm import SVC +from sklearn.utils._mocking import MockDataFrame +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.validation import _num_samples X = np.ones(10) y = np.arange(10) // 2 diff --git a/sklearn/model_selection/tests/test_successive_halving.py b/sklearn/model_selection/tests/test_successive_halving.py index fe06957f5deed..93502b403fbcf 100644 --- a/sklearn/model_selection/tests/test_successive_halving.py +++ b/sklearn/model_selection/tests/test_successive_halving.py @@ -1,26 +1,29 @@ from math import ceil +import numpy as np import pytest from scipy.stats import norm, randint -import numpy as np from sklearn.datasets import make_classification from sklearn.dummy import DummyClassifier from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import StratifiedShuffleSplit -from sklearn.model_selection import LeaveOneGroupOut -from sklearn.model_selection import LeavePGroupsOut -from sklearn.model_selection import GroupKFold -from sklearn.model_selection import GroupShuffleSplit -from sklearn.model_selection import HalvingGridSearchCV -from sklearn.model_selection import HalvingRandomSearchCV -from sklearn.model_selection import KFold, ShuffleSplit -from sklearn.svm import LinearSVC +from sklearn.model_selection import ( + GroupKFold, + GroupShuffleSplit, + HalvingGridSearchCV, + HalvingRandomSearchCV, + KFold, + LeaveOneGroupOut, + LeavePGroupsOut, + ShuffleSplit, + StratifiedKFold, + StratifiedShuffleSplit, +) from sklearn.model_selection._search_successive_halving import ( _SubsampleMetaSplitter, _top_k, ) +from sklearn.svm import LinearSVC class FastClassifier(DummyClassifier): diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 90b5a605ac2e4..5fedf3b802876 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -5,78 +5,83 @@ import tempfile import warnings from functools import partial +from io import StringIO from time import sleep -import pytest import numpy as np +import pytest from scipy.sparse import coo_matrix, csr_matrix -from sklearn.exceptions import FitFailedWarning - -from sklearn.model_selection.tests.test_search import FailingClassifier - -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._mocking import CheckingClassifier, MockDataFrame - -from sklearn.utils.validation import _num_samples -from sklearn.model_selection import cross_val_score, ShuffleSplit -from sklearn.model_selection import cross_val_predict -from sklearn.model_selection import cross_validate -from sklearn.model_selection import permutation_test_score -from sklearn.model_selection import KFold -from sklearn.model_selection import StratifiedKFold -from sklearn.model_selection import LeaveOneOut -from sklearn.model_selection import LeaveOneGroupOut -from sklearn.model_selection import LeavePGroupsOut -from sklearn.model_selection import GroupKFold -from sklearn.model_selection import GroupShuffleSplit -from sklearn.model_selection import learning_curve -from sklearn.model_selection import validation_curve -from sklearn.model_selection._validation import _check_is_permutation -from sklearn.model_selection._validation import _fit_and_score -from sklearn.model_selection._validation import _score - -from sklearn.datasets import make_regression -from sklearn.datasets import load_diabetes -from sklearn.datasets import load_iris -from sklearn.datasets import load_digits -from sklearn.metrics import explained_variance_score -from sklearn.metrics import make_scorer -from sklearn.metrics import accuracy_score -from sklearn.metrics import confusion_matrix -from sklearn.metrics import precision_recall_fscore_support -from sklearn.metrics import precision_score -from sklearn.metrics import r2_score -from sklearn.metrics import mean_squared_error -from sklearn.metrics import check_scoring - -from sklearn.linear_model import Ridge, LogisticRegression, SGDClassifier -from sklearn.linear_model import PassiveAggressiveClassifier, RidgeClassifier +from sklearn.base import BaseEstimator, clone +from sklearn.cluster import KMeans +from sklearn.datasets import ( + load_diabetes, + load_digits, + load_iris, + make_classification, + make_multilabel_classification, + make_regression, +) from sklearn.ensemble import RandomForestClassifier +from sklearn.exceptions import FitFailedWarning +from sklearn.impute import SimpleImputer +from sklearn.linear_model import ( + LogisticRegression, + PassiveAggressiveClassifier, + Ridge, + RidgeClassifier, + SGDClassifier, +) +from sklearn.metrics import ( + accuracy_score, + check_scoring, + confusion_matrix, + explained_variance_score, + make_scorer, + mean_squared_error, + precision_recall_fscore_support, + precision_score, + r2_score, +) +from sklearn.model_selection import ( + GridSearchCV, + GroupKFold, + GroupShuffleSplit, + KFold, + LeaveOneGroupOut, + LeaveOneOut, + LeavePGroupsOut, + ShuffleSplit, + StratifiedKFold, + cross_val_predict, + cross_val_score, + cross_validate, + learning_curve, + permutation_test_score, + validation_curve, +) +from sklearn.model_selection._validation import ( + _check_is_permutation, + _fit_and_score, + _score, +) +from sklearn.model_selection.tests.common import OneTimeSplitter +from sklearn.model_selection.tests.test_search import FailingClassifier +from sklearn.multiclass import OneVsRestClassifier from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC, LinearSVC -from sklearn.cluster import KMeans from sklearn.neural_network import MLPRegressor - -from sklearn.impute import SimpleImputer - -from sklearn.preprocessing import LabelEncoder from sklearn.pipeline import Pipeline - -from io import StringIO -from sklearn.base import BaseEstimator -from sklearn.base import clone -from sklearn.multiclass import OneVsRestClassifier +from sklearn.preprocessing import LabelEncoder +from sklearn.svm import SVC, LinearSVC from sklearn.utils import shuffle -from sklearn.datasets import make_classification -from sklearn.datasets import make_multilabel_classification - -from sklearn.model_selection.tests.common import OneTimeSplitter -from sklearn.model_selection import GridSearchCV - +from sklearn.utils._mocking import CheckingClassifier, MockDataFrame +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) +from sklearn.utils.validation import _num_samples try: WindowsError @@ -600,7 +605,7 @@ def test_cross_val_score_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import Series, DataFrame + from pandas import DataFrame, Series types.append((Series, DataFrame)) except ImportError: @@ -1101,7 +1106,7 @@ def test_cross_val_predict_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import Series, DataFrame + from pandas import DataFrame, Series types.append((Series, DataFrame)) except ImportError: @@ -2058,7 +2063,7 @@ def test_permutation_test_score_pandas(): # check permutation_test_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import Series, DataFrame + from pandas import DataFrame, Series types.append((Series, DataFrame)) except ImportError: diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index b46b4bfb8b5ef..f71cc0cdaf1ce 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -34,29 +34,35 @@ # License: BSD 3 clause import array -import numpy as np +import itertools import warnings + +import numpy as np import scipy.sparse as sp -import itertools -from .base import BaseEstimator, ClassifierMixin, clone, is_classifier -from .base import MultiOutputMixin -from .base import MetaEstimatorMixin, is_regressor -from .preprocessing import LabelBinarizer +from joblib import Parallel + +from .base import ( + BaseEstimator, + ClassifierMixin, + MetaEstimatorMixin, + MultiOutputMixin, + clone, + is_classifier, + is_regressor, +) from .metrics.pairwise import euclidean_distances +from .preprocessing import LabelBinarizer from .utils import check_random_state from .utils._tags import _safe_tags -from .utils.validation import _num_samples -from .utils.validation import check_is_fitted +from .utils.fixes import delayed +from .utils.metaestimators import _safe_split, available_if from .utils.multiclass import ( _check_partial_fit_first_call, - check_classification_targets, _ovr_decision_function, + check_classification_targets, ) -from .utils.metaestimators import _safe_split, available_if -from .utils.fixes import delayed - -from joblib import Parallel +from .utils.validation import _num_samples, check_is_fitted __all__ = [ "OneVsRestClassifier", diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 24e4cc8dda7e8..a7fbb6b3eec94 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -14,19 +14,27 @@ # # License: BSD 3 clause +from abc import ABCMeta, abstractmethod + import numpy as np import scipy.sparse as sp + from joblib import Parallel -from abc import ABCMeta, abstractmethod -from .base import BaseEstimator, clone, MetaEstimatorMixin -from .base import RegressorMixin, ClassifierMixin, is_classifier +from .base import ( + BaseEstimator, + ClassifierMixin, + MetaEstimatorMixin, + RegressorMixin, + clone, + is_classifier, +) from .model_selection import cross_val_predict -from .utils.metaestimators import available_if from .utils import check_random_state -from .utils.validation import check_is_fitted, has_fit_parameter, _check_fit_params -from .utils.multiclass import check_classification_targets from .utils.fixes import delayed +from .utils.metaestimators import available_if +from .utils.multiclass import check_classification_targets +from .utils.validation import _check_fit_params, check_is_fitted, has_fit_parameter __all__ = [ "MultiOutputRegressor", diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index ca7be2d3799a3..33a150420eaaa 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -14,23 +14,17 @@ # # License: BSD 3 clause import warnings - from abc import ABCMeta, abstractmethod - import numpy as np from scipy.special import logsumexp from .base import BaseEstimator, ClassifierMixin -from .preprocessing import binarize -from .preprocessing import LabelBinarizer -from .preprocessing import label_binarize +from .preprocessing import LabelBinarizer, binarize, label_binarize from .utils import deprecated from .utils.extmath import safe_sparse_dot from .utils.multiclass import _check_partial_fit_first_call -from .utils.validation import check_is_fitted, check_non_negative -from .utils.validation import _check_sample_weight - +from .utils.validation import _check_sample_weight, check_is_fitted, check_non_negative __all__ = [ "BernoulliNB", diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py index 12824e9cb684e..a01d460b8885d 100644 --- a/sklearn/neighbors/__init__.py +++ b/sklearn/neighbors/__init__.py @@ -4,19 +4,22 @@ """ from ._ball_tree import BallTree -from ._kd_tree import KDTree -from ._distance_metric import DistanceMetric -from ._graph import kneighbors_graph, radius_neighbors_graph -from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer -from ._unsupervised import NearestNeighbors +from ._base import VALID_METRICS, VALID_METRICS_SPARSE, sort_graph_by_row_values from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier -from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor -from ._nearest_centroid import NearestCentroid +from ._distance_metric import DistanceMetric +from ._graph import ( + KNeighborsTransformer, + RadiusNeighborsTransformer, + kneighbors_graph, + radius_neighbors_graph, +) +from ._kd_tree import KDTree from ._kde import KernelDensity from ._lof import LocalOutlierFactor from ._nca import NeighborhoodComponentsAnalysis -from ._base import sort_graph_by_row_values -from ._base import VALID_METRICS, VALID_METRICS_SPARSE +from ._nearest_centroid import NearestCentroid +from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor +from ._unsupervised import NearestNeighbors __all__ = [ "BallTree", diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index 8ecbaa351647c..7ad28fa5dd358 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -6,37 +6,30 @@ # Multi-output support by Arnaud Joly # # License: BSD 3 clause (C) INRIA, University of Amsterdam -from functools import partial - +import numbers import warnings from abc import ABCMeta, abstractmethod -import numbers +from functools import partial import numpy as np from scipy.sparse import csr_matrix, issparse + from joblib import Parallel, effective_n_jobs -from ._ball_tree import BallTree -from ._kd_tree import KDTree -from ..base import BaseEstimator, MultiOutputMixin -from ..base import is_classifier +from ..base import BaseEstimator, MultiOutputMixin, is_classifier +from ..exceptions import DataConversionWarning, EfficiencyWarning from ..metrics import pairwise_distances_chunked -from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS from ..metrics._pairwise_distances_reduction import ( PairwiseDistancesArgKmin, PairwiseDistancesRadiusNeighborhood, ) -from ..utils import ( - check_array, - gen_even_slices, - _to_object_array, -) +from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS +from ..utils import _to_object_array, check_array, gen_even_slices +from ..utils.fixes import delayed, parse_version, sp_version from ..utils.multiclass import check_classification_targets -from ..utils.validation import check_is_fitted -from ..utils.validation import check_non_negative -from ..utils.fixes import delayed, sp_version -from ..utils.fixes import parse_version -from ..exceptions import DataConversionWarning, EfficiencyWarning +from ..utils.validation import check_is_fitted, check_non_negative +from ._ball_tree import BallTree +from ._kd_tree import KDTree VALID_METRICS = dict( ball_tree=BallTree.valid_metrics, diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi index 36781a770906c..06fdb51cf42e9 100644 --- a/sklearn/neighbors/_binary_tree.pxi +++ b/sklearn/neighbors/_binary_tree.pxi @@ -143,29 +143,32 @@ # """Compute the maximum distance between two nodes""" cimport numpy as np -from libc.math cimport fabs, sqrt, exp, cos, pow, log, lgamma -from libc.math cimport fmin, fmax -from libc.stdlib cimport calloc, malloc, free +from libc.math cimport cos, exp, fabs, fmax, fmin, lgamma, log, pow, sqrt +from libc.stdlib cimport calloc, free, malloc from libc.string cimport memcpy -import numpy as np import warnings +import numpy as np + from ..metrics._dist_metrics cimport ( DistanceMetric, euclidean_dist, - euclidean_rdist, euclidean_dist_to_rdist, + euclidean_rdist, ) - from ._partition_nodes cimport partition_node_indices from ..utils import check_array + from ..utils._typedefs cimport DTYPE_t, ITYPE_t + from ..utils._typedefs import DTYPE, ITYPE + from ..utils._heap cimport heap_push from ..utils._sorting cimport simultaneous_sort as _simultaneous_sort + cdef extern from "numpy/arrayobject.h": void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) @@ -759,7 +762,9 @@ def newObj(obj): ###################################################################### # define the reverse mapping of VALID_METRICS + from sklearn.metrics._dist_metrics import get_valid_metric_ids + VALID_METRIC_IDS = get_valid_metric_ids(VALID_METRICS) diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index bcad8c71aee07..35248b3b3846a 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -8,15 +8,21 @@ # # License: BSD 3 clause (C) INRIA, University of Amsterdam +import warnings + import numpy as np from scipy import stats -from ..utils.extmath import weighted_mode -from ..utils.validation import _is_arraylike, _num_samples -import warnings -from ._base import _check_weights, _get_weights -from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin from ..base import ClassifierMixin +from ..utils.extmath import weighted_mode +from ..utils.validation import _is_arraylike, _num_samples +from ._base import ( + KNeighborsMixin, + NeighborsBase, + RadiusNeighborsMixin, + _check_weights, + _get_weights, +) class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase): diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py index 2be70c0638517..09df0073009b9 100644 --- a/sklearn/neighbors/_graph.py +++ b/sklearn/neighbors/_graph.py @@ -4,11 +4,10 @@ # Tom Dupre la Tour # # License: BSD 3 clause (C) INRIA, University of Amsterdam -from ._base import KNeighborsMixin, RadiusNeighborsMixin -from ._base import NeighborsBase -from ._unsupervised import NearestNeighbors from ..base import TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..utils.validation import check_is_fitted +from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin +from ._unsupervised import NearestNeighbors def _check_params(X, metric, p, metric_params): diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py index b707674993755..41f1849c0bf3b 100644 --- a/sklearn/neighbors/_kde.py +++ b/sklearn/neighbors/_kde.py @@ -5,17 +5,17 @@ # Author: Jake Vanderplas import numbers + import numpy as np from scipy.special import gammainc + from ..base import BaseEstimator from ..utils import check_random_state, check_scalar -from ..utils.validation import _check_sample_weight, check_is_fitted - from ..utils.extmath import row_norms -from ._ball_tree import BallTree, DTYPE +from ..utils.validation import _check_sample_weight, check_is_fitted +from ._ball_tree import DTYPE, BallTree from ._kd_tree import KDTree - VALID_KERNELS = [ "gaussian", "tophat", diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py index 025a1c6d80768..7d7ced17378b0 100644 --- a/sklearn/neighbors/_lof.py +++ b/sklearn/neighbors/_lof.py @@ -2,16 +2,15 @@ # Alexandre Gramfort # License: BSD 3 clause -import numpy as np import warnings -from ._base import NeighborsBase -from ._base import KNeighborsMixin -from ..base import OutlierMixin +import numpy as np +from ..base import OutlierMixin +from ..utils import check_array from ..utils.metaestimators import available_if from ..utils.validation import check_is_fitted -from ..utils import check_array +from ._base import KNeighborsMixin, NeighborsBase __all__ = ["LocalOutlierFactor"] diff --git a/sklearn/neighbors/_nca.py b/sklearn/neighbors/_nca.py index af76a000ef2cb..2d5696c31d991 100644 --- a/sklearn/neighbors/_nca.py +++ b/sklearn/neighbors/_nca.py @@ -6,21 +6,23 @@ # John Chiotellis # License: BSD 3 clause -from warnings import warn -import numpy as np +import numbers import sys import time -import numbers +from warnings import warn + +import numpy as np from scipy.optimize import minimize -from ..utils.extmath import softmax -from ..metrics import pairwise_distances + from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..preprocessing import LabelEncoder from ..decomposition import PCA +from ..exceptions import ConvergenceWarning +from ..metrics import pairwise_distances +from ..preprocessing import LabelEncoder +from ..utils.extmath import softmax from ..utils.multiclass import check_classification_targets from ..utils.random import check_random_state -from ..utils.validation import check_is_fitted, check_array, check_scalar -from ..exceptions import ConvergenceWarning +from ..utils.validation import check_array, check_is_fitted, check_scalar class NeighborhoodComponentsAnalysis( diff --git a/sklearn/neighbors/_nearest_centroid.py b/sklearn/neighbors/_nearest_centroid.py index b52d9407333a6..f90f04813a6b1 100644 --- a/sklearn/neighbors/_nearest_centroid.py +++ b/sklearn/neighbors/_nearest_centroid.py @@ -8,15 +8,16 @@ # License: BSD 3 clause import warnings + import numpy as np from scipy import sparse as sp from ..base import BaseEstimator, ClassifierMixin from ..metrics.pairwise import pairwise_distances from ..preprocessing import LabelEncoder -from ..utils.validation import check_is_fitted -from ..utils.sparsefuncs import csc_median_axis_0 from ..utils.multiclass import check_classification_targets +from ..utils.sparsefuncs import csc_median_axis_0 +from ..utils.validation import check_is_fitted class NearestCentroid(ClassifierMixin, BaseEstimator): diff --git a/sklearn/neighbors/_partition_nodes.pxd b/sklearn/neighbors/_partition_nodes.pxd index 94b02002d7a1e..d53989198c680 100644 --- a/sklearn/neighbors/_partition_nodes.pxd +++ b/sklearn/neighbors/_partition_nodes.pxd @@ -1,5 +1,6 @@ from ..utils._typedefs cimport DTYPE_t, ITYPE_t + cdef int partition_node_indices( DTYPE_t *data, ITYPE_t *node_indices, diff --git a/sklearn/neighbors/_quad_tree.pyx b/sklearn/neighbors/_quad_tree.pyx index 6af7d1f547303..be68dc77cc21b 100644 --- a/sklearn/neighbors/_quad_tree.pyx +++ b/sklearn/neighbors/_quad_tree.pyx @@ -3,17 +3,19 @@ from cpython cimport Py_INCREF, PyObject, PyTypeObject - -from libc.stdlib cimport malloc, free -from libc.string cimport memcpy -from libc.stdio cimport printf from libc.stdint cimport SIZE_MAX +from libc.stdio cimport printf +from libc.stdlib cimport free, malloc +from libc.string cimport memcpy from ..tree._utils cimport safe_realloc, sizet_ptr_to_ndarray -from ..utils import check_array import numpy as np + +from ..utils import check_array + cimport numpy as np + np.import_array() cdef extern from "math.h": diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py index 4c995e5062277..fb837ae99c38e 100644 --- a/sklearn/neighbors/_regression.py +++ b/sklearn/neighbors/_regression.py @@ -14,9 +14,14 @@ import numpy as np -from ._base import _get_weights, _check_weights -from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin from ..base import RegressorMixin +from ._base import ( + KNeighborsMixin, + NeighborsBase, + RadiusNeighborsMixin, + _check_weights, + _get_weights, +) class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase): diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py index 6399363112378..a57ba9454cb1c 100644 --- a/sklearn/neighbors/_unsupervised.py +++ b/sklearn/neighbors/_unsupervised.py @@ -1,7 +1,5 @@ """Unsupervised nearest neighbors learner""" -from ._base import NeighborsBase -from ._base import KNeighborsMixin -from ._base import RadiusNeighborsMixin +from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin, NeighborsBase): diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index d5046afd2da2a..b232a02ebeb3d 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -3,10 +3,11 @@ import numpy as np import pytest from numpy.testing import assert_array_almost_equal + from sklearn.neighbors._ball_tree import BallTree from sklearn.utils import check_random_state -from sklearn.utils.validation import check_array from sklearn.utils._testing import _convert_container +from sklearn.utils.validation import check_array rng = np.random.RandomState(10) V_mahalanobis = rng.rand(3, 3) diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py index d8d9437636d1d..7c368982edbd0 100644 --- a/sklearn/neighbors/tests/test_kd_tree.py +++ b/sklearn/neighbors/tests/test_kd_tree.py @@ -1,9 +1,9 @@ import numpy as np import pytest -from joblib import Parallel -from sklearn.utils.fixes import delayed +from joblib import Parallel from sklearn.neighbors._kd_tree import KDTree +from sklearn.utils.fixes import delayed DIMENSION = 3 diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py index 8cbc613140720..74d1d813d4086 100644 --- a/sklearn/neighbors/tests/test_kde.py +++ b/sklearn/neighbors/tests/test_kde.py @@ -1,16 +1,15 @@ import numpy as np - import pytest -from sklearn.utils._testing import assert_allclose -from sklearn.neighbors import KernelDensity, KDTree, NearestNeighbors -from sklearn.neighbors._ball_tree import kernel_norm -from sklearn.pipeline import make_pipeline +import joblib from sklearn.datasets import make_blobs +from sklearn.exceptions import NotFittedError from sklearn.model_selection import GridSearchCV +from sklearn.neighbors import KDTree, KernelDensity, NearestNeighbors +from sklearn.neighbors._ball_tree import kernel_norm +from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from sklearn.exceptions import NotFittedError -import joblib +from sklearn.utils._testing import assert_allclose # XXX Duplicated in test_neighbors_tree, test_kde diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py index e4b79c8f06668..3c1698c56a075 100644 --- a/sklearn/neighbors/tests/test_lof.py +++ b/sklearn/neighbors/tests/test_lof.py @@ -2,24 +2,22 @@ # Alexandre Gramfort # License: BSD 3 clause +import re from math import sqrt import numpy as np -from sklearn import neighbors -import re import pytest from numpy.testing import assert_array_equal -from sklearn import metrics +from sklearn import metrics, neighbors +from sklearn.datasets import load_iris from sklearn.metrics import roc_auc_score - from sklearn.utils import check_random_state from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils.estimator_checks import check_outlier_corruption -from sklearn.utils.estimator_checks import parametrize_with_checks - -from sklearn.datasets import load_iris - +from sklearn.utils.estimator_checks import ( + check_outlier_corruption, + parametrize_with_checks, +) # load the iris dataset # and randomly permute it diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py index f6fdb343ccdf2..2504c77486441 100644 --- a/sklearn/neighbors/tests/test_nca.py +++ b/sklearn/neighbors/tests/test_nca.py @@ -6,18 +6,19 @@ # John Chiotellis # License: BSD 3 clause -import pytest import re + import numpy as np -from numpy.testing import assert_array_equal, assert_array_almost_equal +import pytest +from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy.optimize import check_grad + from sklearn import clone +from sklearn.datasets import load_iris, make_blobs, make_classification from sklearn.exceptions import ConvergenceWarning -from sklearn.utils import check_random_state -from sklearn.datasets import load_iris, make_classification, make_blobs -from sklearn.neighbors import NeighborhoodComponentsAnalysis from sklearn.metrics import pairwise_distances - +from sklearn.neighbors import NeighborhoodComponentsAnalysis +from sklearn.utils import check_random_state rng = check_random_state(0) # load and shuffle iris dataset diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py index c762b8390ed63..a0b0189c992a3 100644 --- a/sklearn/neighbors/tests/test_nearest_centroid.py +++ b/sklearn/neighbors/tests/test_nearest_centroid.py @@ -3,11 +3,11 @@ """ import numpy as np import pytest -from scipy import sparse as sp from numpy.testing import assert_array_equal +from scipy import sparse as sp -from sklearn.neighbors import NearestCentroid from sklearn import datasets +from sklearn.neighbors import NearestCentroid # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 337e777191475..6bdfea276f335 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1,58 +1,42 @@ -from itertools import product -from contextlib import nullcontext +import re import warnings +from contextlib import nullcontext +from itertools import product -import pytest -import re import numpy as np +import pytest from scipy.sparse import ( bsr_matrix, coo_matrix, csc_matrix, csr_matrix, - dok_matrix, dia_matrix, - lil_matrix, + dok_matrix, issparse, + lil_matrix, ) -from sklearn import ( - config_context, - datasets, - metrics, - neighbors, -) +import joblib +from sklearn import config_context, datasets, metrics, neighbors from sklearn.base import clone -from sklearn.exceptions import DataConversionWarning -from sklearn.exceptions import EfficiencyWarning -from sklearn.exceptions import NotFittedError +from sklearn.exceptions import DataConversionWarning, EfficiencyWarning, NotFittedError from sklearn.metrics.pairwise import pairwise_distances from sklearn.metrics.tests.test_dist_metrics import BOOL_METRICS from sklearn.metrics.tests.test_pairwise_distances_reduction import ( assert_radius_neighborhood_results_equality, ) -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import train_test_split -from sklearn.neighbors import ( - VALID_METRICS_SPARSE, - KNeighborsRegressor, -) +from sklearn.model_selection import cross_val_score, train_test_split +from sklearn.neighbors import VALID_METRICS_SPARSE, KNeighborsRegressor from sklearn.neighbors._base import ( - _is_sorted_by_data, + KNeighborsMixin, _check_precomputed, + _is_sorted_by_data, sort_graph_by_row_values, - KNeighborsMixin, ) from sklearn.pipeline import make_pipeline -from sklearn.utils._testing import ( - assert_allclose, - assert_array_equal, -) -from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import assert_allclose, assert_array_equal, ignore_warnings +from sklearn.utils.fixes import parse_version, sp_version from sklearn.utils.validation import check_random_state -from sklearn.utils.fixes import sp_version, parse_version - -import joblib rng = np.random.RandomState(0) # load and shuffle iris dataset @@ -2136,8 +2120,8 @@ def test_auto_algorithm(X, metric, metric_params, expected_algo): # TODO: Remove in 1.3 def test_neighbors_distance_metric_deprecation(): - from sklearn.neighbors import DistanceMetric from sklearn.metrics import DistanceMetric as ActualDistanceMetric + from sklearn.neighbors import DistanceMetric msg = r"This import path will be removed in 1\.3" with pytest.warns(FutureWarning, match=msg): diff --git a/sklearn/neighbors/tests/test_neighbors_pipeline.py b/sklearn/neighbors/tests/test_neighbors_pipeline.py index df56e64f5770b..dc4d59fbc2813 100644 --- a/sklearn/neighbors/tests/test_neighbors_pipeline.py +++ b/sklearn/neighbors/tests/test_neighbors_pipeline.py @@ -8,23 +8,20 @@ import numpy as np import pytest -from sklearn.utils._testing import assert_array_almost_equal +from sklearn.base import clone +from sklearn.cluster import DBSCAN, SpectralClustering from sklearn.cluster.tests.common import generate_clustered_data from sklearn.datasets import make_blobs +from sklearn.manifold import TSNE, Isomap, SpectralEmbedding +from sklearn.neighbors import ( + KNeighborsRegressor, + KNeighborsTransformer, + LocalOutlierFactor, + RadiusNeighborsRegressor, + RadiusNeighborsTransformer, +) from sklearn.pipeline import make_pipeline -from sklearn.base import clone - -from sklearn.neighbors import KNeighborsTransformer -from sklearn.neighbors import RadiusNeighborsTransformer - -from sklearn.cluster import DBSCAN -from sklearn.cluster import SpectralClustering -from sklearn.neighbors import KNeighborsRegressor -from sklearn.neighbors import RadiusNeighborsRegressor -from sklearn.neighbors import LocalOutlierFactor -from sklearn.manifold import SpectralEmbedding -from sklearn.manifold import Isomap -from sklearn.manifold import TSNE +from sklearn.utils._testing import assert_array_almost_equal def test_spectral_clustering(): diff --git a/sklearn/neighbors/tests/test_neighbors_tree.py b/sklearn/neighbors/tests/test_neighbors_tree.py index 85d578c271faa..5a05fb1a51e7b 100644 --- a/sklearn/neighbors/tests/test_neighbors_tree.py +++ b/sklearn/neighbors/tests/test_neighbors_tree.py @@ -1,30 +1,23 @@ # License: BSD 3 clause -import pickle import itertools +import pickle import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_almost_equal from sklearn.metrics import DistanceMetric -from sklearn.neighbors._ball_tree import ( - BallTree, - kernel_norm, - DTYPE, - ITYPE, - NeighborsHeap as NeighborsHeapBT, - simultaneous_sort as simultaneous_sort_bt, - nodeheap_sort as nodeheap_sort_bt, -) -from sklearn.neighbors._kd_tree import ( - KDTree, - NeighborsHeap as NeighborsHeapKDT, - simultaneous_sort as simultaneous_sort_kdt, - nodeheap_sort as nodeheap_sort_kdt, -) - +from sklearn.neighbors._ball_tree import DTYPE, ITYPE, BallTree +from sklearn.neighbors._ball_tree import NeighborsHeap as NeighborsHeapBT +from sklearn.neighbors._ball_tree import kernel_norm +from sklearn.neighbors._ball_tree import nodeheap_sort as nodeheap_sort_bt +from sklearn.neighbors._ball_tree import simultaneous_sort as simultaneous_sort_bt +from sklearn.neighbors._kd_tree import KDTree +from sklearn.neighbors._kd_tree import NeighborsHeap as NeighborsHeapKDT +from sklearn.neighbors._kd_tree import nodeheap_sort as nodeheap_sort_kdt +from sklearn.neighbors._kd_tree import simultaneous_sort as simultaneous_sort_kdt from sklearn.utils import check_random_state -from numpy.testing import assert_array_almost_equal, assert_allclose rng = np.random.RandomState(42) V_mahalanobis = rng.rand(3, 3) diff --git a/sklearn/neighbors/tests/test_quad_tree.py b/sklearn/neighbors/tests/test_quad_tree.py index bba79e2c8ee1a..be9a4c5fe549d 100644 --- a/sklearn/neighbors/tests/test_quad_tree.py +++ b/sklearn/neighbors/tests/test_quad_tree.py @@ -1,6 +1,6 @@ import pickle -import numpy as np +import numpy as np import pytest from sklearn.neighbors._quad_tree import _QuadTree diff --git a/sklearn/neural_network/__init__.py b/sklearn/neural_network/__init__.py index 7f6bad7bbd7e7..0b321b605de0b 100644 --- a/sklearn/neural_network/__init__.py +++ b/sklearn/neural_network/__init__.py @@ -5,9 +5,7 @@ # License: BSD 3 clause +from ._multilayer_perceptron import MLPClassifier, MLPRegressor from ._rbm import BernoulliRBM -from ._multilayer_perceptron import MLPClassifier -from ._multilayer_perceptron import MLPRegressor - __all__ = ["BernoulliRBM", "MLPClassifier", "MLPRegressor"] diff --git a/sklearn/neural_network/_base.py b/sklearn/neural_network/_base.py index 0e40739556e18..73d62f9543e98 100644 --- a/sklearn/neural_network/_base.py +++ b/sklearn/neural_network/_base.py @@ -5,7 +5,6 @@ # License: BSD 3 clause import numpy as np - from scipy.special import expit as logistic_sigmoid from scipy.special import xlogy diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index 5fb4d7c64ffee..0fb3b821503ab 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -6,36 +6,35 @@ # Jiyuan Qian # License: BSD 3 clause -import numpy as np - -from abc import ABCMeta, abstractmethod import warnings +from abc import ABCMeta, abstractmethod from itertools import chain +import numpy as np import scipy.optimize -from ..base import ( - BaseEstimator, - ClassifierMixin, - RegressorMixin, -) -from ..base import is_classifier -from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS -from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer +from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, is_classifier +from ..exceptions import ConvergenceWarning from ..model_selection import train_test_split from ..preprocessing import LabelBinarizer -from ..utils import gen_batches, check_random_state -from ..utils import shuffle -from ..utils import _safe_indexing -from ..utils import column_or_1d -from ..exceptions import ConvergenceWarning +from ..utils import ( + _safe_indexing, + check_random_state, + column_or_1d, + gen_batches, + shuffle, +) from ..utils.extmath import safe_sparse_dot -from ..utils.validation import check_is_fitted -from ..utils.multiclass import _check_partial_fit_first_call, unique_labels -from ..utils.multiclass import type_of_target -from ..utils.optimize import _check_optimize_result from ..utils.metaestimators import available_if - +from ..utils.multiclass import ( + _check_partial_fit_first_call, + type_of_target, + unique_labels, +) +from ..utils.optimize import _check_optimize_result +from ..utils.validation import check_is_fitted +from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS +from ._stochastic_optimizers import AdamOptimizer, SGDOptimizer _STOCHASTIC_SOLVERS = ["sgd", "adam"] diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py index aac92c3108787..24f142f47c418 100644 --- a/sklearn/neural_network/_rbm.py +++ b/sklearn/neural_network/_rbm.py @@ -13,13 +13,9 @@ import scipy.sparse as sp from scipy.special import expit # logistic function -from ..base import BaseEstimator -from ..base import TransformerMixin -from ..base import _ClassNamePrefixFeaturesOutMixin -from ..utils import check_random_state -from ..utils import gen_even_slices -from ..utils.extmath import safe_sparse_dot -from ..utils.extmath import log_logistic +from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin +from ..utils import check_random_state, gen_even_slices +from ..utils.extmath import log_logistic, safe_sparse_dot from ..utils.validation import check_is_fitted diff --git a/sklearn/neural_network/tests/test_base.py b/sklearn/neural_network/tests/test_base.py index 32aa7f1fee917..af7b38e899907 100644 --- a/sklearn/neural_network/tests/test_base.py +++ b/sklearn/neural_network/tests/test_base.py @@ -1,8 +1,7 @@ -import pytest import numpy as np +import pytest -from sklearn.neural_network._base import binary_log_loss -from sklearn.neural_network._base import log_loss +from sklearn.neural_network._base import binary_log_loss, log_loss def test_binary_log_loss_1_prob_finite(): diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py index 999983d751cc1..78f285f6020ea 100644 --- a/sklearn/neural_network/tests/test_mlp.py +++ b/sklearn/neural_network/tests/test_mlp.py @@ -5,33 +5,29 @@ # Author: Issam H. Laradji # License: BSD 3 clause -import pytest +import re import sys import warnings -import re +from io import StringIO import numpy as np -import joblib +import pytest +from numpy.testing import assert_allclose, assert_almost_equal, assert_array_equal +from scipy.sparse import csr_matrix -from numpy.testing import ( - assert_almost_equal, - assert_array_equal, - assert_allclose, +import joblib +from sklearn.datasets import ( + load_digits, + load_iris, + make_multilabel_classification, + make_regression, ) - -from sklearn.datasets import load_digits, load_iris -from sklearn.datasets import make_regression, make_multilabel_classification from sklearn.exceptions import ConvergenceWarning -from io import StringIO from sklearn.metrics import roc_auc_score -from sklearn.neural_network import MLPClassifier -from sklearn.neural_network import MLPRegressor -from sklearn.preprocessing import LabelBinarizer -from sklearn.preprocessing import MinMaxScaler, scale -from scipy.sparse import csr_matrix +from sklearn.neural_network import MLPClassifier, MLPRegressor +from sklearn.preprocessing import LabelBinarizer, MinMaxScaler, scale from sklearn.utils._testing import ignore_warnings - ACTIVATION_TYPES = ["identity", "logistic", "tanh", "relu"] X_digits, y_digits = load_digits(n_class=3, return_X_y=True) diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py index d36fa6b0bd11f..e46723628ac2c 100644 --- a/sklearn/neural_network/tests/test_rbm.py +++ b/sklearn/neural_network/tests/test_rbm.py @@ -1,18 +1,18 @@ -import sys import re -import pytest +import sys +from io import StringIO import numpy as np +import pytest from scipy.sparse import csc_matrix, csr_matrix, lil_matrix + +from sklearn.datasets import load_digits +from sklearn.neural_network import BernoulliRBM from sklearn.utils._testing import ( + assert_allclose, assert_almost_equal, assert_array_equal, - assert_allclose, ) - -from sklearn.datasets import load_digits -from io import StringIO -from sklearn.neural_network import BernoulliRBM from sklearn.utils.validation import assert_all_finite Xdigits, _ = load_digits(return_X_y=True) diff --git a/sklearn/neural_network/tests/test_stochastic_optimizers.py b/sklearn/neural_network/tests/test_stochastic_optimizers.py index e876892f28daf..58a9f0c7dda13 100644 --- a/sklearn/neural_network/tests/test_stochastic_optimizers.py +++ b/sklearn/neural_network/tests/test_stochastic_optimizers.py @@ -1,13 +1,12 @@ import numpy as np from sklearn.neural_network._stochastic_optimizers import ( + AdamOptimizer, BaseOptimizer, SGDOptimizer, - AdamOptimizer, ) from sklearn.utils._testing import assert_array_equal - shapes = [(4, 6), (6, 8), (7, 8, 9)] diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 74347f250bc83..499c46be72fa9 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -14,24 +14,19 @@ import numpy as np from scipy import sparse + from joblib import Parallel -from .base import clone, TransformerMixin +from .base import TransformerMixin, clone +from .exceptions import NotFittedError from .preprocessing import FunctionTransformer +from .utils import Bunch, _print_elapsed_time from .utils._estimator_html_repr import _VisualBlock -from .utils.metaestimators import available_if -from .utils import ( - Bunch, - _print_elapsed_time, -) -from .utils.deprecation import deprecated from .utils._tags import _safe_tags -from .utils.validation import check_memory -from .utils.validation import check_is_fitted +from .utils.deprecation import deprecated from .utils.fixes import delayed -from .exceptions import NotFittedError - -from .utils.metaestimators import _BaseComposition +from .utils.metaestimators import _BaseComposition, available_if +from .utils.validation import check_is_fitted, check_memory __all__ = ["Pipeline", "FeatureUnion", "make_pipeline", "make_union"] diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py index ccea91545a467..faafcc6b9f08a 100644 --- a/sklearn/preprocessing/__init__.py +++ b/sklearn/preprocessing/__init__.py @@ -3,40 +3,31 @@ normalization, binarization methods. """ -from ._function_transformer import FunctionTransformer - -from ._data import Binarizer -from ._data import KernelCenterer -from ._data import MinMaxScaler -from ._data import MaxAbsScaler -from ._data import Normalizer -from ._data import RobustScaler -from ._data import StandardScaler -from ._data import QuantileTransformer -from ._data import add_dummy_feature -from ._data import binarize -from ._data import normalize -from ._data import scale -from ._data import robust_scale -from ._data import maxabs_scale -from ._data import minmax_scale -from ._data import quantile_transform -from ._data import power_transform -from ._data import PowerTransformer - -from ._encoders import OneHotEncoder -from ._encoders import OrdinalEncoder - -from ._label import label_binarize -from ._label import LabelBinarizer -from ._label import LabelEncoder -from ._label import MultiLabelBinarizer - +from ._data import ( + Binarizer, + KernelCenterer, + MaxAbsScaler, + MinMaxScaler, + Normalizer, + PowerTransformer, + QuantileTransformer, + RobustScaler, + StandardScaler, + add_dummy_feature, + binarize, + maxabs_scale, + minmax_scale, + normalize, + power_transform, + quantile_transform, + robust_scale, + scale, +) from ._discretization import KBinsDiscretizer - -from ._polynomial import PolynomialFeatures -from ._polynomial import SplineTransformer - +from ._encoders import OneHotEncoder, OrdinalEncoder +from ._function_transformer import FunctionTransformer +from ._label import LabelBinarizer, LabelEncoder, MultiLabelBinarizer, label_binarize +from ._polynomial import PolynomialFeatures, SplineTransformer __all__ = [ "Binarizer", diff --git a/sklearn/preprocessing/_csr_polynomial_expansion.pyx b/sklearn/preprocessing/_csr_polynomial_expansion.pyx index ef958b12266e1..9bf55452a34a5 100644 --- a/sklearn/preprocessing/_csr_polynomial_expansion.pyx +++ b/sklearn/preprocessing/_csr_polynomial_expansion.pyx @@ -1,8 +1,9 @@ # Author: Andrew nystrom from scipy.sparse import csr_matrix -from numpy cimport ndarray + cimport numpy as np +from numpy cimport ndarray np.import_array() ctypedef np.int32_t INDEX_T diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index f0088aab521ad..6f4607544a2cb 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -11,39 +11,35 @@ import warnings import numpy as np -from scipy import sparse -from scipy import stats -from scipy import optimize +from scipy import optimize, sparse, stats from scipy.special import boxcox from ..base import ( BaseEstimator, TransformerMixin, - _OneToOneFeatureMixin, _ClassNamePrefixFeaturesOutMixin, + _OneToOneFeatureMixin, ) from ..utils import check_array from ..utils.extmath import _incremental_mean_and_var, row_norms -from ..utils.sparsefuncs_fast import ( - inplace_csr_row_normalize_l1, - inplace_csr_row_normalize_l2, -) from ..utils.sparsefuncs import ( + incr_mean_variance_axis, inplace_column_scale, mean_variance_axis, - incr_mean_variance_axis, min_max_axis, ) +from ..utils.sparsefuncs_fast import ( + inplace_csr_row_normalize_l1, + inplace_csr_row_normalize_l2, +) from ..utils.validation import ( + FLOAT_DTYPES, + _check_sample_weight, check_is_fitted, check_random_state, - _check_sample_weight, - FLOAT_DTYPES, ) - from ._encoders import OneHotEncoder - BOUNDS_THRESHOLD = 1e-7 __all__ = [ diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index 478051c52c630..50d6ebbe948c5 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -5,18 +5,20 @@ import numbers -import numpy as np import warnings -from . import OneHotEncoder +import numpy as np from ..base import BaseEstimator, TransformerMixin -from ..utils.validation import check_array -from ..utils.validation import check_is_fitted -from ..utils.validation import check_random_state -from ..utils.validation import _check_feature_names_in -from ..utils.validation import check_scalar from ..utils import _safe_indexing +from ..utils.validation import ( + _check_feature_names_in, + check_array, + check_is_fitted, + check_random_state, + check_scalar, +) +from ._encoders import OneHotEncoder class KBinsDiscretizer(TransformerMixin, BaseEstimator): diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index d4cc642a18562..b8cf86f1aaf76 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -10,13 +10,10 @@ from ..base import BaseEstimator, TransformerMixin, _OneToOneFeatureMixin from ..utils import check_array, is_scalar_nan -from ..utils.deprecation import deprecated -from ..utils.validation import check_is_fitted -from ..utils.validation import _check_feature_names_in +from ..utils._encode import _check_unknown, _encode, _get_counts, _unique from ..utils._mask import _get_mask - -from ..utils._encode import _encode, _check_unknown, _unique, _get_counts - +from ..utils.deprecation import deprecated +from ..utils.validation import _check_feature_names_in, check_is_fitted __all__ = ["OneHotEncoder", "OrdinalEncoder"] diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index e7f4a5e337208..58ee406675779 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -6,23 +6,20 @@ # Hamzeh Alsalhi # License: BSD 3 clause -from collections import defaultdict -import itertools import array +import itertools import warnings +from collections import defaultdict import numpy as np import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin - -from ..utils.sparsefuncs import min_max_axis from ..utils import column_or_1d -from ..utils.validation import _num_samples, check_array, check_is_fitted -from ..utils.multiclass import unique_labels -from ..utils.multiclass import type_of_target from ..utils._encode import _encode, _unique - +from ..utils.multiclass import type_of_target, unique_labels +from ..utils.sparsefuncs import min_max_axis +from ..utils.validation import _num_samples, check_array, check_is_fitted __all__ = [ "label_binarize", diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py index 09306cc56ab11..15ecf8ba0016e 100644 --- a/sklearn/preprocessing/_polynomial.py +++ b/sklearn/preprocessing/_polynomial.py @@ -14,13 +14,15 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array from ..utils.deprecation import deprecated -from ..utils.validation import check_is_fitted, FLOAT_DTYPES, _check_sample_weight -from ..utils.validation import _check_feature_names_in from ..utils.stats import _weighted_percentile - +from ..utils.validation import ( + FLOAT_DTYPES, + _check_feature_names_in, + _check_sample_weight, + check_is_fitted, +) from ._csr_polynomial_expansion import _csr_polynomial_expansion - __all__ = [ "PolynomialFeatures", "SplineTransformer", diff --git a/sklearn/preprocessing/tests/test_common.py b/sklearn/preprocessing/tests/test_common.py index 98b8dcdfe0e2a..9ebef6c000050 100644 --- a/sklearn/preprocessing/tests/test_common.py +++ b/sklearn/preprocessing/tests/test_common.py @@ -1,31 +1,27 @@ import warnings -import pytest import numpy as np - +import pytest from scipy import sparse +from sklearn.base import clone from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split - -from sklearn.base import clone - -from sklearn.preprocessing import maxabs_scale -from sklearn.preprocessing import minmax_scale -from sklearn.preprocessing import scale -from sklearn.preprocessing import power_transform -from sklearn.preprocessing import quantile_transform -from sklearn.preprocessing import robust_scale - -from sklearn.preprocessing import MaxAbsScaler -from sklearn.preprocessing import MinMaxScaler -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import PowerTransformer -from sklearn.preprocessing import QuantileTransformer -from sklearn.preprocessing import RobustScaler - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose +from sklearn.preprocessing import ( + MaxAbsScaler, + MinMaxScaler, + PowerTransformer, + QuantileTransformer, + RobustScaler, + StandardScaler, + maxabs_scale, + minmax_scale, + power_transform, + quantile_transform, + robust_scale, + scale, +) +from sklearn.utils._testing import assert_allclose, assert_array_equal iris = load_iris() diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index aab28fce336a4..894700168bf49 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -4,59 +4,54 @@ # # License: BSD 3 clause -import warnings import itertools - import re +import warnings + import numpy as np import numpy.linalg as la -from scipy import sparse, stats - import pytest +from scipy import sparse, stats -from sklearn.utils import gen_batches - -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_less -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import skip_if_32bit -from sklearn.utils._testing import _convert_container - -from sklearn.utils.sparsefuncs import mean_variance_axis -from sklearn.preprocessing import Binarizer -from sklearn.preprocessing import KernelCenterer -from sklearn.preprocessing import Normalizer -from sklearn.preprocessing import normalize -from sklearn.preprocessing import StandardScaler -from sklearn.preprocessing import scale -from sklearn.preprocessing import MinMaxScaler -from sklearn.preprocessing import minmax_scale -from sklearn.preprocessing import QuantileTransformer -from sklearn.preprocessing import quantile_transform -from sklearn.preprocessing import MaxAbsScaler -from sklearn.preprocessing import maxabs_scale -from sklearn.preprocessing import RobustScaler -from sklearn.preprocessing import robust_scale -from sklearn.preprocessing import add_dummy_feature -from sklearn.preprocessing import PowerTransformer -from sklearn.preprocessing import power_transform -from sklearn.preprocessing._data import _handle_zeros_in_scale -from sklearn.preprocessing._data import BOUNDS_THRESHOLD -from sklearn.metrics.pairwise import linear_kernel - -from sklearn.exceptions import NotFittedError - +from sklearn import datasets from sklearn.base import clone -from sklearn.pipeline import Pipeline +from sklearn.exceptions import NotFittedError +from sklearn.metrics.pairwise import linear_kernel from sklearn.model_selection import cross_val_predict +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import ( + Binarizer, + KernelCenterer, + MaxAbsScaler, + MinMaxScaler, + Normalizer, + PowerTransformer, + QuantileTransformer, + RobustScaler, + StandardScaler, + add_dummy_feature, + maxabs_scale, + minmax_scale, + normalize, + power_transform, + quantile_transform, + robust_scale, + scale, +) +from sklearn.preprocessing._data import BOUNDS_THRESHOLD, _handle_zeros_in_scale from sklearn.svm import SVR -from sklearn.utils import shuffle - -from sklearn import datasets - +from sklearn.utils import gen_batches, shuffle +from sklearn.utils._testing import ( + _convert_container, + assert_allclose, + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + assert_array_less, + skip_if_32bit, +) +from sklearn.utils.sparsefuncs import mean_variance_axis iris = datasets.load_iris() diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py index e1317acb97808..cc37af1873300 100644 --- a/sklearn/preprocessing/tests/test_discretization.py +++ b/sklearn/preprocessing/tests/test_discretization.py @@ -1,15 +1,15 @@ -import pytest +import warnings + import numpy as np +import pytest import scipy.sparse as sp -import warnings from sklearn import clone -from sklearn.preprocessing import KBinsDiscretizer -from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder from sklearn.utils._testing import ( + assert_allclose_dense_sparse, assert_array_almost_equal, assert_array_equal, - assert_allclose_dense_sparse, ) X = [[-2, 1.5, -4, -1], [-1, 2.5, -3, -0.5], [0, 3.5, -2, 0.5], [1, 4.5, -1, 2]] diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py index ea32de22cd2f0..8679bd07ae6f0 100644 --- a/sklearn/preprocessing/tests/test_encoders.py +++ b/sklearn/preprocessing/tests/test_encoders.py @@ -1,17 +1,17 @@ import re import numpy as np -from scipy import sparse import pytest +from scipy import sparse from sklearn.exceptions import NotFittedError -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import _convert_container +from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder from sklearn.utils import is_scalar_nan - -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import OrdinalEncoder +from sklearn.utils._testing import ( + _convert_container, + assert_allclose, + assert_array_equal, +) def test_one_hot_encoder_sparse_dense(): diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py index 98b8d75da38b4..6fce8d48fa471 100644 --- a/sklearn/preprocessing/tests/test_function_transformer.py +++ b/sklearn/preprocessing/tests/test_function_transformer.py @@ -1,15 +1,15 @@ import warnings -import pytest import numpy as np +import pytest from scipy import sparse -from sklearn.utils import _safe_indexing from sklearn.preprocessing import FunctionTransformer +from sklearn.utils import _safe_indexing from sklearn.utils._testing import ( - assert_array_equal, - assert_allclose_dense_sparse, _convert_container, + assert_allclose_dense_sparse, + assert_array_equal, ) diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index a59cd9b152d27..90efe990eee58 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -1,29 +1,26 @@ import numpy as np - import pytest - -from scipy.sparse import issparse -from scipy.sparse import coo_matrix -from scipy.sparse import csc_matrix -from scipy.sparse import csr_matrix -from scipy.sparse import dok_matrix -from scipy.sparse import lil_matrix - -from sklearn.utils.multiclass import type_of_target - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils import _to_object_array - -from sklearn.preprocessing._label import LabelBinarizer -from sklearn.preprocessing._label import MultiLabelBinarizer -from sklearn.preprocessing._label import LabelEncoder -from sklearn.preprocessing._label import label_binarize - -from sklearn.preprocessing._label import _inverse_binarize_thresholding -from sklearn.preprocessing._label import _inverse_binarize_multiclass +from scipy.sparse import ( + coo_matrix, + csc_matrix, + csr_matrix, + dok_matrix, + issparse, + lil_matrix, +) from sklearn import datasets +from sklearn.preprocessing._label import ( + LabelBinarizer, + LabelEncoder, + MultiLabelBinarizer, + _inverse_binarize_multiclass, + _inverse_binarize_thresholding, + label_binarize, +) +from sklearn.utils import _to_object_array +from sklearn.utils._testing import assert_array_equal, ignore_warnings +from sklearn.utils.multiclass import type_of_target iris = datasets.load_iris() diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py index 2129247125d6c..7504cd3bf23c2 100644 --- a/sklearn/preprocessing/tests/test_polynomial.py +++ b/sklearn/preprocessing/tests/test_polynomial.py @@ -1,11 +1,10 @@ import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_equal from scipy import sparse +from scipy.interpolate import BSpline from scipy.sparse import random as sparse_random -from sklearn.utils._testing import assert_array_almost_equal -from numpy.testing import assert_allclose, assert_array_equal -from scipy.interpolate import BSpline from sklearn.linear_model import LinearRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import ( @@ -13,6 +12,7 @@ PolynomialFeatures, SplineTransformer, ) +from sklearn.utils._testing import assert_array_almost_equal @pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer)) diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 3b4a5e2236db5..7f1a17323e0ee 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -30,17 +30,15 @@ from abc import ABCMeta, abstractmethod import numpy as np -from scipy import linalg import scipy.sparse as sp +from scipy import linalg -from .base import BaseEstimator, TransformerMixin -from .base import _ClassNamePrefixFeaturesOutMixin - +from .base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin +from .exceptions import DataDimensionalityWarning from .utils import check_random_state from .utils.extmath import safe_sparse_dot from .utils.random import sample_without_replacement from .utils.validation import check_array, check_is_fitted -from .exceptions import DataDimensionalityWarning __all__ = [ "SparseRandomProjection", diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 1e3684797ebe6..04a82bd10e27a 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -52,23 +52,24 @@ Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 """ +import warnings + # Authors: Clay Woolam # Utkarsh Upadhyay # License: BSD from abc import ABCMeta, abstractmethod -import warnings import numpy as np from scipy import sparse from scipy.sparse import csgraph from ..base import BaseEstimator, ClassifierMixin +from ..exceptions import ConvergenceWarning from ..metrics.pairwise import rbf_kernel from ..neighbors import NearestNeighbors from ..utils.extmath import safe_sparse_dot from ..utils.multiclass import check_classification_targets from ..utils.validation import check_is_fitted -from ..exceptions import ConvergenceWarning class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta): diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py index 9b21cd273bfb9..5214d8f86a317 100644 --- a/sklearn/semi_supervised/_self_training.py +++ b/sklearn/semi_supervised/_self_training.py @@ -2,10 +2,10 @@ import numpy as np -from ..base import MetaEstimatorMixin, clone, BaseEstimator -from ..utils.validation import check_is_fitted -from ..utils.metaestimators import available_if +from ..base import BaseEstimator, MetaEstimatorMixin, clone from ..utils import safe_mask +from ..utils.metaestimators import available_if +from ..utils.validation import check_is_fitted __all__ = ["SelfTrainingClassifier"] diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 7fbca2f83522b..3a862da40a0c7 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -1,18 +1,18 @@ """ test the label propagation module """ -import numpy as np -import pytest import warnings +import numpy as np +import pytest +from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy.sparse import issparse -from sklearn.semi_supervised import _label_propagation as label_propagation + +from sklearn.datasets import make_classification +from sklearn.exceptions import ConvergenceWarning from sklearn.metrics.pairwise import rbf_kernel from sklearn.model_selection import train_test_split from sklearn.neighbors import NearestNeighbors -from sklearn.datasets import make_classification -from sklearn.exceptions import ConvergenceWarning -from numpy.testing import assert_array_almost_equal -from numpy.testing import assert_array_equal +from sklearn.semi_supervised import _label_propagation as label_propagation ESTIMATORS = [ (label_propagation.LabelPropagation, {"kernel": "rbf"}), diff --git a/sklearn/semi_supervised/tests/test_self_training.py b/sklearn/semi_supervised/tests/test_self_training.py index 49198927aa8d9..2b1228640eb74 100644 --- a/sklearn/semi_supervised/tests/test_self_training.py +++ b/sklearn/semi_supervised/tests/test_self_training.py @@ -1,18 +1,17 @@ from math import ceil import numpy as np -from numpy.testing import assert_array_equal import pytest +from numpy.testing import assert_array_equal +from sklearn.datasets import load_iris, make_blobs from sklearn.ensemble import StackingClassifier from sklearn.exceptions import NotFittedError -from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC -from sklearn.model_selection import train_test_split -from sklearn.datasets import load_iris, make_blobs from sklearn.metrics import accuracy_score - +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier from sklearn.semi_supervised import SelfTrainingClassifier +from sklearn.svm import SVC # Author: Oliver Rausch # License: BSD 3 clause diff --git a/sklearn/setup.py b/sklearn/setup.py index 874bdbbcbed43..6e1e18180180c 100644 --- a/sklearn/setup.py +++ b/sklearn/setup.py @@ -1,12 +1,12 @@ -import sys import os +import sys from sklearn._build_utils import cythonize_extensions def configuration(parent_package="", top_path=None): - from numpy.distutils.misc_util import Configuration import numpy + from numpy.distutils.misc_util import Configuration libraries = [] if os.name == "posix": diff --git a/sklearn/svm/__init__.py b/sklearn/svm/__init__.py index f5b4123230f93..0d64ce24cdd63 100644 --- a/sklearn/svm/__init__.py +++ b/sklearn/svm/__init__.py @@ -10,8 +10,8 @@ # of their respective owners. # License: BSD 3 clause (C) INRIA 2010 -from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR from ._bounds import l1_min_c +from ._classes import SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM __all__ = [ "LinearSVC", diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py index 42b53409fa8b8..ebc4edcaa56bc 100644 --- a/sklearn/svm/_base.py +++ b/sklearn/svm/_base.py @@ -1,30 +1,30 @@ -import warnings import numbers +import warnings from abc import ABCMeta, abstractmethod import numpy as np import scipy.sparse as sp -# mypy error: error: Module 'sklearn.svm' has no attribute '_libsvm' -# (and same for other imports) -from . import _libsvm as libsvm # type: ignore -from . import _liblinear as liblinear # type: ignore -from . import _libsvm_sparse as libsvm_sparse # type: ignore from ..base import BaseEstimator, ClassifierMixin +from ..exceptions import ConvergenceWarning, NotFittedError from ..preprocessing import LabelEncoder -from ..utils.multiclass import _ovr_decision_function -from ..utils import check_array, check_random_state -from ..utils import column_or_1d -from ..utils import compute_class_weight -from ..utils.metaestimators import available_if +from ..utils import check_array, check_random_state, column_or_1d, compute_class_weight from ..utils.extmath import safe_sparse_dot -from ..utils.validation import check_is_fitted, _check_large_sparse -from ..utils.validation import _num_samples -from ..utils.validation import _check_sample_weight, check_consistent_length -from ..utils.multiclass import check_classification_targets -from ..exceptions import ConvergenceWarning -from ..exceptions import NotFittedError +from ..utils.metaestimators import available_if +from ..utils.multiclass import _ovr_decision_function, check_classification_targets +from ..utils.validation import ( + _check_large_sparse, + _check_sample_weight, + _num_samples, + check_consistent_length, + check_is_fitted, +) +# mypy error: error: Module 'sklearn.svm' has no attribute '_libsvm' +# (and same for other imports) +from . import _liblinear as liblinear # type: ignore +from . import _libsvm as libsvm # type: ignore +from . import _libsvm_sparse as libsvm_sparse # type: ignore LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr"] diff --git a/sklearn/svm/_bounds.py b/sklearn/svm/_bounds.py index 86e973973ca5a..6806793f8a52c 100644 --- a/sklearn/svm/_bounds.py +++ b/sklearn/svm/_bounds.py @@ -5,8 +5,8 @@ import numpy as np from ..preprocessing import LabelBinarizer -from ..utils.validation import check_consistent_length, check_array from ..utils.extmath import safe_sparse_dot +from ..utils.validation import check_array, check_consistent_length def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0): diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 3cfbafce876ea..484f222b66db3 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -1,11 +1,12 @@ -import numpy as np import warnings -from ._base import _fit_liblinear, BaseSVC, BaseLibSVM -from ..base import BaseEstimator, RegressorMixin, OutlierMixin -from ..linear_model._base import LinearClassifierMixin, SparseCoefMixin, LinearModel -from ..utils.validation import _num_samples +import numpy as np + +from ..base import BaseEstimator, OutlierMixin, RegressorMixin +from ..linear_model._base import LinearClassifierMixin, LinearModel, SparseCoefMixin from ..utils.multiclass import check_classification_targets +from ..utils.validation import _num_samples +from ._base import BaseLibSVM, BaseSVC, _fit_liblinear class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator): diff --git a/sklearn/svm/_liblinear.pyx b/sklearn/svm/_liblinear.pyx index 9dd15e0716c7f..9057f42ce97f2 100644 --- a/sklearn/svm/_liblinear.pyx +++ b/sklearn/svm/_liblinear.pyx @@ -4,10 +4,11 @@ Wrapper for liblinear Author: fabian.pedregosa@inria.fr """ -import numpy as np +import numpy as np + cimport numpy as np -from ..utils._cython_blas cimport _dot, _axpy, _scal, _nrm2 +from ..utils._cython_blas cimport _axpy, _dot, _nrm2, _scal include "_liblinear.pxi" @@ -50,7 +51,7 @@ def train_wrap(X, np.ndarray[np.float64_t, ndim=1, mode='c'] Y, free_problem(problem) free_parameter(param) raise ValueError(error_msg) - + cdef BlasFunctions blas_functions blas_functions.dot = _dot[double] blas_functions.axpy = _axpy[double] diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 4df99724b790a..76472005c19ad 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -28,9 +28,12 @@ Authors """ import warnings -import numpy as np + +import numpy as np + cimport numpy as np from libc.stdlib cimport free + from ..utils._cython_blas cimport _dot include "_libsvm.pxi" diff --git a/sklearn/svm/_libsvm_sparse.pyx b/sklearn/svm/_libsvm_sparse.pyx index 64fc69364b2ee..fb374eb4a32cd 100644 --- a/sklearn/svm/_libsvm_sparse.pyx +++ b/sklearn/svm/_libsvm_sparse.pyx @@ -1,9 +1,15 @@ import warnings -import numpy as np + +import numpy as np + cimport numpy as np + from scipy import sparse + from ..exceptions import ConvergenceWarning + from ..utils._cython_blas cimport _dot + np.import_array() cdef extern from *: diff --git a/sklearn/svm/setup.py b/sklearn/svm/setup.py index d5f94d8a11181..c401864d39502 100644 --- a/sklearn/svm/setup.py +++ b/sklearn/svm/setup.py @@ -1,5 +1,6 @@ import os from os.path import join + import numpy diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py index 043c86dec86e4..f3c91906a25d1 100644 --- a/sklearn/svm/tests/test_bounds.py +++ b/sklearn/svm/tests/test_bounds.py @@ -1,14 +1,12 @@ import numpy as np +import pytest from scipy import sparse as sp from scipy import stats -import pytest - -from sklearn.svm._bounds import l1_min_c -from sklearn.svm import LinearSVC from sklearn.linear_model import LogisticRegression -from sklearn.svm._newrand import set_seed_wrap, bounded_rand_int_wrap - +from sklearn.svm import LinearSVC +from sklearn.svm._bounds import l1_min_c +from sklearn.svm._newrand import bounded_rand_int_wrap, set_seed_wrap dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]] sparse_X = sp.csr_matrix(dense_X) diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index 3bb6d0f268d07..b45fe60c2948b 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -1,16 +1,14 @@ -import pytest - import numpy as np +import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse -from sklearn import datasets, svm, linear_model, base -from sklearn.datasets import make_classification, load_digits, make_blobs -from sklearn.svm.tests import test_svm +from sklearn import base, datasets, linear_model, svm +from sklearn.datasets import load_digits, make_blobs, make_classification from sklearn.exceptions import ConvergenceWarning -from sklearn.utils.extmath import safe_sparse_dot +from sklearn.svm.tests import test_svm from sklearn.utils._testing import ignore_warnings, skip_if_32bit - +from sklearn.utils.extmath import safe_sparse_dot # test sample 1 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index db1d49ab4bcf9..cf1587615c5e2 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -3,32 +3,37 @@ TODO: remove hard coded numerical results when possible """ -import numpy as np import itertools -import pytest import re -from numpy.testing import assert_array_equal, assert_array_almost_equal -from numpy.testing import assert_almost_equal -from numpy.testing import assert_allclose +import numpy as np +import pytest +from numpy.testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) from scipy import sparse -from sklearn import svm, linear_model, datasets, metrics, base -from sklearn.svm import LinearSVC -from sklearn.svm import LinearSVR -from sklearn.model_selection import train_test_split -from sklearn.datasets import make_classification, make_blobs + +from sklearn import base, datasets, linear_model, metrics, svm +from sklearn.datasets import make_blobs, make_classification +from sklearn.exceptions import ( + ConvergenceWarning, + NotFittedError, + UndefinedMetricWarning, +) from sklearn.metrics import f1_score from sklearn.metrics.pairwise import rbf_kernel -from sklearn.utils import check_random_state -from sklearn.utils._testing import ignore_warnings -from sklearn.utils.validation import _num_samples -from sklearn.utils import shuffle -from sklearn.exceptions import ConvergenceWarning -from sklearn.exceptions import NotFittedError, UndefinedMetricWarning +from sklearn.model_selection import train_test_split from sklearn.multiclass import OneVsRestClassifier # mypy error: Module 'sklearn.svm' has no attribute '_libsvm' from sklearn.svm import _libsvm # type: ignore +from sklearn.svm import LinearSVC, LinearSVR +from sklearn.utils import check_random_state, shuffle +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.validation import _num_samples # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/tests/random_seed.py b/sklearn/tests/random_seed.py index f282f8002f2c5..a114749e33494 100644 --- a/sklearn/tests/random_seed.py +++ b/sklearn/tests/random_seed.py @@ -8,10 +8,11 @@ https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed """ -import pytest from os import environ from random import Random +import pytest + # Passes the main worker's random seeds to workers class XDistHooks: diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index bdbe55c463841..d17ccd61ac722 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -1,30 +1,27 @@ # Author: Gael Varoquaux # License: BSD 3 clause +import pickle import re +import warnings + import numpy as np -import scipy.sparse as sp import pytest -import warnings +import scipy.sparse as sp import sklearn -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_no_warnings -from sklearn.utils._testing import ignore_warnings - -from sklearn.base import BaseEstimator, clone, is_classifier -from sklearn.svm import SVC -from sklearn.pipeline import Pipeline +from sklearn import config_context, datasets +from sklearn.base import BaseEstimator, TransformerMixin, clone, is_classifier from sklearn.model_selection import GridSearchCV - -from sklearn.tree import DecisionTreeClassifier -from sklearn.tree import DecisionTreeRegressor -from sklearn import datasets - -from sklearn.base import TransformerMixin +from sklearn.pipeline import Pipeline +from sklearn.svm import SVC +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils._mocking import MockDataFrame -from sklearn import config_context -import pickle +from sklearn.utils._testing import ( + assert_array_equal, + assert_no_warnings, + ignore_warnings, +) ############################################################################# @@ -653,9 +650,9 @@ def transform(self, X): "Feature names only support names that are all strings. " "Got feature names with dtypes: ['int', 'str']" ) - with pytest.warns(FutureWarning, match=msg) as record: + with pytest.warns(FutureWarning, match=msg): trans.fit(df_mixed) # transform on feature names that are mixed also warns: - with pytest.warns(FutureWarning, match=msg) as record: + with pytest.warns(FutureWarning, match=msg): trans.transform(df_mixed) diff --git a/sklearn/tests/test_build.py b/sklearn/tests/test_build.py index d6affa5e4cc78..40a960cba6283 100644 --- a/sklearn/tests/test_build.py +++ b/sklearn/tests/test_build.py @@ -1,7 +1,8 @@ import os -import pytest import textwrap +import pytest + from sklearn import __version__ from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index fb8a6d4f344b2..603834765b19f 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -1,51 +1,52 @@ # Authors: Alexandre Gramfort # License: BSD 3 clause -import pytest import numpy as np +import pytest from numpy.testing import assert_allclose from scipy import sparse from sklearn.base import BaseEstimator, clone -from sklearn.dummy import DummyClassifier -from sklearn.model_selection import LeaveOneOut, train_test_split - -from sklearn.utils._testing import ( - assert_array_almost_equal, - assert_almost_equal, - assert_array_equal, +from sklearn.calibration import ( + CalibratedClassifierCV, + CalibrationDisplay, + _CalibratedClassifier, + _sigmoid_calibration, + _SigmoidCalibration, + calibration_curve, ) -from sklearn.utils.extmath import softmax -from sklearn.exceptions import NotFittedError -from sklearn.datasets import make_classification, make_blobs, load_iris -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import KFold, cross_val_predict -from sklearn.naive_bayes import MultinomialNB +from sklearn.datasets import load_iris, make_blobs, make_classification +from sklearn.dummy import DummyClassifier from sklearn.ensemble import ( RandomForestClassifier, RandomForestRegressor, VotingClassifier, ) -from sklearn.linear_model import LogisticRegression, LinearRegression -from sklearn.tree import DecisionTreeClassifier -from sklearn.svm import LinearSVC -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.preprocessing import StandardScaler -from sklearn.isotonic import IsotonicRegression +from sklearn.exceptions import NotFittedError from sklearn.feature_extraction import DictVectorizer from sklearn.impute import SimpleImputer +from sklearn.isotonic import IsotonicRegression +from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.metrics import brier_score_loss -from sklearn.calibration import ( - _CalibratedClassifier, - _SigmoidCalibration, - _sigmoid_calibration, - CalibratedClassifierCV, - CalibrationDisplay, - calibration_curve, +from sklearn.model_selection import ( + KFold, + LeaveOneOut, + cross_val_predict, + train_test_split, ) +from sklearn.naive_bayes import MultinomialNB +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import LabelEncoder, StandardScaler +from sklearn.svm import LinearSVC +from sklearn.tree import DecisionTreeClassifier from sklearn.utils._mocking import CheckingClassifier -from sklearn.utils._testing import _convert_container - +from sklearn.utils._testing import ( + _convert_container, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) +from sklearn.utils.extmath import softmax N_SAMPLES = 200 diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index b5fc83d1028b3..6caa8c80bd56e 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -7,52 +7,44 @@ # License: BSD 3 clause import os -import warnings -import sys -import re import pkgutil -from inspect import isgenerator, signature, Parameter -from itertools import product, chain +import re +import sys +import warnings from functools import partial +from inspect import Parameter, isgenerator, signature +from itertools import chain, product -import pytest import numpy as np - -from sklearn.utils import all_estimators -from sklearn.utils._testing import ignore_warnings -from sklearn.exceptions import ConvergenceWarning -from sklearn.exceptions import FitFailedWarning -from sklearn.utils.estimator_checks import check_estimator +import pytest import sklearn - from sklearn.decomposition import PCA -from sklearn.linear_model._base import LinearClassifierMixin -from sklearn.linear_model import LogisticRegression -from sklearn.linear_model import Ridge -from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import RandomizedSearchCV +from sklearn.exceptions import ConvergenceWarning, FitFailedWarning from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.model_selection import HalvingGridSearchCV -from sklearn.model_selection import HalvingRandomSearchCV +from sklearn.linear_model import LogisticRegression, Ridge +from sklearn.linear_model._base import LinearClassifierMixin +from sklearn.model_selection import ( + GridSearchCV, + HalvingGridSearchCV, + HalvingRandomSearchCV, + RandomizedSearchCV, +) from sklearn.pipeline import make_pipeline - -from sklearn.utils import IS_PYPY +from sklearn.utils import IS_PYPY, all_estimators from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags -from sklearn.utils._testing import ( - SkipTest, - set_random_state, -) +from sklearn.utils._testing import SkipTest, ignore_warnings, set_random_state from sklearn.utils.estimator_checks import ( _construct_instance, - _set_checking_parameters, _get_check_estimator_ids, + _set_checking_parameters, check_class_weight_balanced_linear_classifier, - parametrize_with_checks, check_dataframe_column_names_consistency, + check_estimator, check_n_features_in_after_fitting, check_transformer_get_feature_names_out, check_transformer_get_feature_names_out_pandas, + parametrize_with_checks, ) diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py index 86496d6bd45cf..88252401cab69 100644 --- a/sklearn/tests/test_config.py +++ b/sklearn/tests/test_config.py @@ -1,10 +1,10 @@ import time from concurrent.futures import ThreadPoolExecutor -from joblib import Parallel import pytest -from sklearn import get_config, set_config, config_context +from joblib import Parallel +from sklearn import config_context, get_config, set_config from sklearn.utils.fixes import delayed diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 9ef444c67b3e1..7e22a4e5dc294 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -1,26 +1,23 @@ import numpy as np - import pytest - from scipy import linalg -from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_almost_equal - -from sklearn.datasets import make_blobs -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis -from sklearn.discriminant_analysis import _cov -from sklearn.covariance import ledoit_wolf from sklearn.cluster import KMeans - -from sklearn.covariance import ShrunkCovariance -from sklearn.covariance import LedoitWolf - +from sklearn.covariance import LedoitWolf, ShrunkCovariance, ledoit_wolf +from sklearn.datasets import make_blobs +from sklearn.discriminant_analysis import ( + LinearDiscriminantAnalysis, + QuadraticDiscriminantAnalysis, + _cov, +) from sklearn.preprocessing import StandardScaler +from sklearn.utils import check_random_state +from sklearn.utils._testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) # Data is just 6 separable points in the plane X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype="f") diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 5e22b425be1ec..bb68e32f0ea34 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -2,31 +2,31 @@ # Raghav RV # License: BSD 3 clause +import importlib import inspect import warnings -import importlib - -from pkgutil import walk_packages from inspect import signature +from pkgutil import walk_packages import numpy as np +import pytest import sklearn -from sklearn.utils import IS_PYPY -from sklearn.utils._testing import check_docstring_parameters -from sklearn.utils._testing import _get_func_name -from sklearn.utils._testing import ignore_warnings -from sklearn.utils import all_estimators -from sklearn.utils.estimator_checks import _enforce_estimator_tags_y -from sklearn.utils.estimator_checks import _enforce_estimator_tags_x -from sklearn.utils.estimator_checks import _construct_instance -from sklearn.utils.deprecation import _is_deprecated from sklearn.datasets import make_classification from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import FunctionTransformer - -import pytest - +from sklearn.utils import IS_PYPY, all_estimators +from sklearn.utils._testing import ( + _get_func_name, + check_docstring_parameters, + ignore_warnings, +) +from sklearn.utils.deprecation import _is_deprecated +from sklearn.utils.estimator_checks import ( + _construct_instance, + _enforce_estimator_tags_x, + _enforce_estimator_tags_y, +) # walk_packages() ignores DeprecationWarnings, now we need to ignore # FutureWarnings diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index 8ee55f2ef6d78..3aeadfb6f0a1a 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -1,13 +1,14 @@ +import importlib +import inspect +import pkgutil import re from inspect import signature -import pkgutil -import inspect -import importlib from typing import Optional import pytest -from sklearn.utils import all_estimators + import sklearn +from sklearn.utils import all_estimators numpydoc_validation = pytest.importorskip("numpydoc.validate") @@ -327,8 +328,8 @@ def test_docstring(Estimator, method, request): if __name__ == "__main__": - import sys import argparse + import sys parser = argparse.ArgumentParser(description="Validate docstring with numpydoc.") parser.add_argument("import_path", help="Import path to validate") diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index 61f8c2e4190e1..c8cc55980dd84 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -1,17 +1,17 @@ -import pytest - import numpy as np +import pytest import scipy.sparse as sp from sklearn.base import clone -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import ignore_warnings -from sklearn.utils.stats import _weighted_percentile - from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.exceptions import NotFittedError +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + ignore_warnings, +) +from sklearn.utils.stats import _weighted_percentile @ignore_warnings diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index 5600cf8706e75..e2cb85985f726 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -1,27 +1,25 @@ -import warnings -import numpy as np -import pickle import copy +import pickle +import warnings +import numpy as np import pytest +from scipy.special import expit from sklearn.datasets import make_regression from sklearn.isotonic import ( - check_increasing, - isotonic_regression, IsotonicRegression, _make_unique, + check_increasing, + isotonic_regression, ) - -from sklearn.utils.validation import check_array +from sklearn.utils import shuffle from sklearn.utils._testing import ( assert_allclose, - assert_array_equal, assert_array_almost_equal, + assert_array_equal, ) -from sklearn.utils import shuffle - -from scipy.special import expit +from sklearn.utils.validation import check_array def test_permutation_invariance(): diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py index bcee4781b5927..29bd22f22122f 100644 --- a/sklearn/tests/test_kernel_approximation.py +++ b/sklearn/tests/test_kernel_approximation.py @@ -1,20 +1,24 @@ import re import numpy as np -from scipy.sparse import csr_matrix import pytest +from scipy.sparse import csr_matrix -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal - -from sklearn.metrics.pairwise import kernel_metrics -from sklearn.kernel_approximation import RBFSampler -from sklearn.kernel_approximation import AdditiveChi2Sampler -from sklearn.kernel_approximation import SkewedChi2Sampler -from sklearn.kernel_approximation import Nystroem -from sklearn.kernel_approximation import PolynomialCountSketch from sklearn.datasets import make_classification -from sklearn.metrics.pairwise import polynomial_kernel, rbf_kernel, chi2_kernel +from sklearn.kernel_approximation import ( + AdditiveChi2Sampler, + Nystroem, + PolynomialCountSketch, + RBFSampler, + SkewedChi2Sampler, +) +from sklearn.metrics.pairwise import ( + chi2_kernel, + kernel_metrics, + polynomial_kernel, + rbf_kernel, +) +from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal # generate data rng = np.random.RandomState(0) diff --git a/sklearn/tests/test_kernel_ridge.py b/sklearn/tests/test_kernel_ridge.py index 76a5c77e73be1..e0d2d2cf39574 100644 --- a/sklearn/tests/test_kernel_ridge.py +++ b/sklearn/tests/test_kernel_ridge.py @@ -2,13 +2,10 @@ import scipy.sparse as sp from sklearn.datasets import make_regression -from sklearn.linear_model import Ridge from sklearn.kernel_ridge import KernelRidge +from sklearn.linear_model import Ridge from sklearn.metrics.pairwise import pairwise_kernels -from sklearn.utils._testing import ignore_warnings - -from sklearn.utils._testing import assert_array_almost_equal - +from sklearn.utils._testing import assert_array_almost_equal, ignore_warnings X, y = make_regression(n_features=10, random_state=0) Xcsr = sp.csr_matrix(X) diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index e743741f6fa43..3e1b846f2f7fb 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -5,23 +5,24 @@ import numpy as np import pytest -from sklearn.base import BaseEstimator -from sklearn.base import is_regressor +from sklearn.base import BaseEstimator, is_regressor from sklearn.datasets import make_classification -from sklearn.utils import all_estimators -from sklearn.utils.estimator_checks import _enforce_estimator_tags_x -from sklearn.utils.estimator_checks import _enforce_estimator_tags_y -from sklearn.utils.validation import check_is_fitted -from sklearn.utils._testing import set_random_state -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.feature_selection import RFE, RFECV from sklearn.ensemble import BaggingClassifier from sklearn.exceptions import NotFittedError +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.feature_selection import RFE, RFECV +from sklearn.linear_model import LogisticRegression, Ridge +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import MaxAbsScaler, StandardScaler from sklearn.semi_supervised import SelfTrainingClassifier -from sklearn.linear_model import Ridge, LogisticRegression -from sklearn.preprocessing import StandardScaler, MaxAbsScaler +from sklearn.utils import all_estimators +from sklearn.utils._testing import set_random_state +from sklearn.utils.estimator_checks import ( + _enforce_estimator_tags_x, + _enforce_estimator_tags_y, +) +from sklearn.utils.validation import check_is_fitted class DelegatorData: diff --git a/sklearn/tests/test_min_dependencies_readme.py b/sklearn/tests/test_min_dependencies_readme.py index 8b2b548c5bf42..f49df2414cedb 100644 --- a/sklearn/tests/test_min_dependencies_readme.py +++ b/sklearn/tests/test_min_dependencies_readme.py @@ -2,11 +2,12 @@ import os -import re import platform +import re from pathlib import Path import pytest + import sklearn from sklearn._min_dependencies import dependent_packages from sklearn.utils.fixes import parse_version diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index a3621414ae793..bc748ecfd81b4 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -1,45 +1,39 @@ +from re import escape + import numpy as np -import scipy.sparse as sp import pytest +import scipy.sparse as sp from numpy.testing import assert_allclose -from re import escape - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._mocking import CheckingClassifier -from sklearn.multiclass import OneVsRestClassifier -from sklearn.multiclass import OneVsOneClassifier -from sklearn.multiclass import OutputCodeClassifier -from sklearn.utils.multiclass import check_classification_targets, type_of_target -from sklearn.utils import ( - check_array, - shuffle, -) - -from sklearn.metrics import precision_score -from sklearn.metrics import recall_score - -from sklearn.svm import LinearSVC, SVC -from sklearn.naive_bayes import MultinomialNB +from sklearn import datasets, svm +from sklearn.datasets import load_breast_cancer +from sklearn.exceptions import NotFittedError +from sklearn.impute import SimpleImputer from sklearn.linear_model import ( - LinearRegression, - Lasso, ElasticNet, - Ridge, - Perceptron, + Lasso, + LinearRegression, LogisticRegression, + Perceptron, + Ridge, SGDClassifier, ) -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.neighbors import KNeighborsClassifier +from sklearn.metrics import precision_score, recall_score from sklearn.model_selection import GridSearchCV, cross_val_score +from sklearn.multiclass import ( + OneVsOneClassifier, + OneVsRestClassifier, + OutputCodeClassifier, +) +from sklearn.naive_bayes import MultinomialNB +from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.impute import SimpleImputer -from sklearn import svm -from sklearn.exceptions import NotFittedError -from sklearn import datasets -from sklearn.datasets import load_breast_cancer +from sklearn.svm import SVC, LinearSVC +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.utils import check_array, shuffle +from sklearn.utils._mocking import CheckingClassifier +from sklearn.utils._testing import assert_almost_equal, assert_array_equal +from sklearn.utils.multiclass import check_classification_targets, type_of_target iris = datasets.load_iris() rng = np.random.RandomState(0) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 25d209223acc1..2e3d50baed0ee 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -1,36 +1,44 @@ -import pytest import numpy as np +import pytest import scipy.sparse as sp -from joblib import cpu_count -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal +from joblib import cpu_count from sklearn import datasets -from sklearn.base import clone -from sklearn.datasets import make_classification -from sklearn.datasets import load_linnerud -from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier +from sklearn.base import ClassifierMixin, clone +from sklearn.datasets import load_linnerud, make_classification +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.ensemble import ( + GradientBoostingRegressor, + RandomForestClassifier, + StackingRegressor, +) from sklearn.exceptions import NotFittedError -from sklearn.linear_model import Lasso -from sklearn.linear_model import LogisticRegression -from sklearn.linear_model import OrthogonalMatchingPursuit -from sklearn.linear_model import Ridge -from sklearn.linear_model import SGDClassifier -from sklearn.linear_model import SGDRegressor +from sklearn.impute import SimpleImputer +from sklearn.linear_model import ( + Lasso, + LogisticRegression, + OrthogonalMatchingPursuit, + Ridge, + SGDClassifier, + SGDRegressor, +) from sklearn.metrics import jaccard_score, mean_squared_error +from sklearn.model_selection import GridSearchCV from sklearn.multiclass import OneVsRestClassifier -from sklearn.multioutput import ClassifierChain, RegressorChain -from sklearn.multioutput import MultiOutputClassifier -from sklearn.multioutput import MultiOutputRegressor +from sklearn.multioutput import ( + ClassifierChain, + MultiOutputClassifier, + MultiOutputRegressor, + RegressorChain, +) +from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC -from sklearn.base import ClassifierMixin from sklearn.utils import shuffle -from sklearn.model_selection import GridSearchCV -from sklearn.dummy import DummyRegressor, DummyClassifier -from sklearn.pipeline import make_pipeline -from sklearn.impute import SimpleImputer -from sklearn.ensemble import StackingRegressor +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) def test_multi_target_regression(): diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 47fd6821ad305..d6b2eddabb83e 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -1,22 +1,24 @@ import re +import warnings import numpy as np -import scipy.sparse import pytest -import warnings +import scipy.sparse from sklearn.datasets import load_digits, load_iris - -from sklearn.model_selection import train_test_split -from sklearn.model_selection import cross_val_score - -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal - -from sklearn.naive_bayes import GaussianNB, BernoulliNB -from sklearn.naive_bayes import MultinomialNB, ComplementNB -from sklearn.naive_bayes import CategoricalNB +from sklearn.model_selection import cross_val_score, train_test_split +from sklearn.naive_bayes import ( + BernoulliNB, + CategoricalNB, + ComplementNB, + GaussianNB, + MultinomialNB, +) +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) DISCRETE_NAIVE_BAYES_CLASSES = [BernoulliNB, CategoricalNB, ComplementNB, MultinomialNB] ALL_NAIVE_BAYES_CLASSES = DISCRETE_NAIVE_BAYES_CLASSES + [GaussianNB] diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 6913815191ea8..2c4e939dcc6db 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1,43 +1,42 @@ """ Test the pipeline module. """ -from tempfile import mkdtemp +import itertools +import re import shutil import time -import re -import itertools +from tempfile import mkdtemp -import pytest import numpy as np +import pytest from scipy import sparse -import joblib +import joblib +from sklearn.base import BaseEstimator, TransformerMixin, clone, is_classifier +from sklearn.cluster import KMeans +from sklearn.datasets import load_iris +from sklearn.decomposition import PCA, TruncatedSVD +from sklearn.dummy import DummyRegressor +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.exceptions import NotFittedError +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_selection import SelectKBest, f_classif +from sklearn.impute import SimpleImputer +from sklearn.linear_model import Lasso, LinearRegression, LogisticRegression +from sklearn.metrics import accuracy_score, r2_score +from sklearn.neighbors import LocalOutlierFactor +from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline, make_union +from sklearn.preprocessing import StandardScaler +from sklearn.svm import SVC from sklearn.utils._testing import ( - assert_allclose, - assert_array_equal, - assert_array_almost_equal, MinimalClassifier, MinimalRegressor, MinimalTransformer, + assert_allclose, + assert_array_almost_equal, + assert_array_equal, ) -from sklearn.exceptions import NotFittedError from sklearn.utils.validation import check_is_fitted -from sklearn.base import clone, is_classifier, BaseEstimator, TransformerMixin -from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union -from sklearn.svm import SVC -from sklearn.neighbors import LocalOutlierFactor -from sklearn.linear_model import LogisticRegression, Lasso -from sklearn.linear_model import LinearRegression -from sklearn.metrics import accuracy_score, r2_score -from sklearn.cluster import KMeans -from sklearn.feature_selection import SelectKBest, f_classif -from sklearn.dummy import DummyRegressor -from sklearn.decomposition import PCA, TruncatedSVD -from sklearn.datasets import load_iris -from sklearn.preprocessing import StandardScaler -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.impute import SimpleImputer iris = load_iris() diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py index 4d21090a3e6fb..3b768bec829f4 100644 --- a/sklearn/tests/test_random_projection.py +++ b/sklearn/tests/test_random_projection.py @@ -1,25 +1,27 @@ import functools -from typing import List, Any import warnings +from typing import Any, List import numpy as np -import scipy.sparse as sp import pytest +import scipy.sparse as sp -from sklearn.metrics import euclidean_distances - -from sklearn.random_projection import johnson_lindenstrauss_min_dim -from sklearn.random_projection import _gaussian_random_matrix -from sklearn.random_projection import _sparse_random_matrix -from sklearn.random_projection import SparseRandomProjection -from sklearn.random_projection import GaussianRandomProjection - -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_almost_equal from sklearn.exceptions import DataDimensionalityWarning +from sklearn.metrics import euclidean_distances +from sklearn.random_projection import ( + GaussianRandomProjection, + SparseRandomProjection, + _gaussian_random_matrix, + _sparse_random_matrix, + johnson_lindenstrauss_min_dim, +) +from sklearn.utils._testing import ( + assert_allclose, + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, +) all_sparse_random_matrix: List[Any] = [_sparse_random_matrix] all_dense_random_matrix: List[Any] = [_gaussian_random_matrix] diff --git a/sklearn/tree/__init__.py b/sklearn/tree/__init__.py index f7a8fd183c7cc..8cfb42c73e118 100644 --- a/sklearn/tree/__init__.py +++ b/sklearn/tree/__init__.py @@ -3,12 +3,14 @@ classification and regression. """ -from ._classes import BaseDecisionTree -from ._classes import DecisionTreeClassifier -from ._classes import DecisionTreeRegressor -from ._classes import ExtraTreeClassifier -from ._classes import ExtraTreeRegressor -from ._export import export_graphviz, plot_tree, export_text +from ._classes import ( + BaseDecisionTree, + DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeClassifier, + ExtraTreeRegressor, +) +from ._export import export_graphviz, export_text, plot_tree __all__ = [ "BaseDecisionTree", diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index 79257355a4150..f4be1e16653f2 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -14,39 +14,37 @@ # # License: BSD 3 clause +import copy import numbers import warnings -import copy -from abc import ABCMeta -from abc import abstractmethod +from abc import ABCMeta, abstractmethod from math import ceil import numpy as np from scipy.sparse import issparse -from ..base import BaseEstimator -from ..base import ClassifierMixin -from ..base import clone -from ..base import RegressorMixin -from ..base import is_classifier -from ..base import MultiOutputMixin -from ..utils import Bunch -from ..utils import check_random_state -from ..utils import check_scalar +from ..base import ( + BaseEstimator, + ClassifierMixin, + MultiOutputMixin, + RegressorMixin, + clone, + is_classifier, +) +from ..utils import Bunch, check_random_state, check_scalar, compute_sample_weight from ..utils.deprecation import deprecated -from ..utils.validation import _check_sample_weight -from ..utils import compute_sample_weight from ..utils.multiclass import check_classification_targets -from ..utils.validation import check_is_fitted - +from ..utils.validation import _check_sample_weight, check_is_fitted +from . import _criterion, _splitter, _tree from ._criterion import Criterion from ._splitter import Splitter -from ._tree import DepthFirstTreeBuilder -from ._tree import BestFirstTreeBuilder -from ._tree import Tree -from ._tree import _build_pruned_tree_ccp -from ._tree import ccp_pruning_path -from . import _tree, _splitter, _criterion +from ._tree import ( + BestFirstTreeBuilder, + DepthFirstTreeBuilder, + Tree, + _build_pruned_tree_ccp, + ccp_pruning_path, +) __all__ = [ "DecisionTreeClassifier", diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd index 1639b5f4b3195..7ffed4845970e 100644 --- a/sklearn/tree/_criterion.pxd +++ b/sklearn/tree/_criterion.pxd @@ -10,13 +10,15 @@ # See _criterion.pyx for implementation details. import numpy as np + cimport numpy as np -from ._tree cimport DTYPE_t # Type of X -from ._tree cimport DOUBLE_t # Type of y, sample_weight -from ._tree cimport SIZE_t # Type for indices and counters -from ._tree cimport INT32_t # Signed 32 bit integer -from ._tree cimport UINT32_t # Unsigned 32 bit integer +from ._tree cimport DOUBLE_t # Type of y, sample_weight +from ._tree cimport DTYPE_t # Type of X +from ._tree cimport INT32_t # Signed 32 bit integer +from ._tree cimport SIZE_t # Type for indices and counters +from ._tree cimport UINT32_t # Unsigned 32 bit integer + cdef class Criterion: # The criterion computes the impurity of a node and the reduction of diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 57012fcab2296..72f9be8b02157 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -12,19 +12,20 @@ # # License: BSD 3 clause -from libc.string cimport memcpy -from libc.string cimport memset from libc.math cimport fabs +from libc.string cimport memcpy, memset import numpy as np + cimport numpy as np + np.import_array() from numpy.math cimport INFINITY from scipy.special.cython_special cimport xlogy -from ._utils cimport log -from ._utils cimport WeightedMedianCalculator +from ._utils cimport WeightedMedianCalculator, log + # EPSILON is used in the Poisson criterion cdef double EPSILON = 10 * np.finfo('double').eps diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py index 4e2e8b58cc370..03bb773901c36 100644 --- a/sklearn/tree/_export.py +++ b/sklearn/tree/_export.py @@ -16,13 +16,10 @@ import numpy as np -from ..utils.validation import check_is_fitted from ..base import is_classifier - -from . import _criterion -from . import _tree -from ._reingold_tilford import buchheim, Tree -from . import DecisionTreeClassifier +from ..utils.validation import check_is_fitted +from . import DecisionTreeClassifier, _criterion, _tree +from ._reingold_tilford import Tree, buchheim def _color_brew(n): diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd index cf01fed9cfd7d..d77c5d5cb1cbe 100644 --- a/sklearn/tree/_splitter.pxd +++ b/sklearn/tree/_splitter.pxd @@ -10,15 +10,16 @@ # See _splitter.pyx for details. import numpy as np + cimport numpy as np from ._criterion cimport Criterion +from ._tree cimport DOUBLE_t # Type of y, sample_weight +from ._tree cimport DTYPE_t # Type of X +from ._tree cimport INT32_t # Signed 32 bit integer +from ._tree cimport SIZE_t # Type for indices and counters +from ._tree cimport UINT32_t # Unsigned 32 bit integer -from ._tree cimport DTYPE_t # Type of X -from ._tree cimport DOUBLE_t # Type of y, sample_weight -from ._tree cimport SIZE_t # Type for indices and counters -from ._tree cimport INT32_t # Signed 32 bit integer -from ._tree cimport UINT32_t # Unsigned 32 bit integer cdef struct SplitRecord: # Data to track sample split diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 5d0b6204deb13..5e0779003a436 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -11,25 +11,22 @@ # # License: BSD 3 clause -from ._criterion cimport Criterion +from libc.stdlib cimport free, qsort +from libc.string cimport memcpy, memset -from libc.stdlib cimport free -from libc.stdlib cimport qsort -from libc.string cimport memcpy -from libc.string cimport memset +from ._criterion cimport Criterion import numpy as np + cimport numpy as np + np.import_array() from scipy.sparse import csc_matrix -from ._utils cimport log -from ._utils cimport rand_int -from ._utils cimport rand_uniform -from ._utils cimport RAND_R_MAX -from ._utils cimport safe_realloc from ..utils._sorting cimport simultaneous_sort +from ._utils cimport RAND_R_MAX, log, rand_int, rand_uniform, safe_realloc + cdef double INFINITY = np.inf diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd index 0874187ee98ae..1639ba9353561 100644 --- a/sklearn/tree/_tree.pxd +++ b/sklearn/tree/_tree.pxd @@ -11,6 +11,7 @@ # See _tree.pyx for details. import numpy as np + cimport numpy as np ctypedef np.npy_float32 DTYPE_t # Type of X @@ -19,8 +20,8 @@ ctypedef np.npy_intp SIZE_t # Type for indices and counters ctypedef np.npy_int32 INT32_t # Signed 32 bit integer ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer -from ._splitter cimport Splitter -from ._splitter cimport SplitRecord +from ._splitter cimport SplitRecord, Splitter + cdef struct Node: # Base storage structure for the nodes in a Tree object diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index 85c44b5eaf9b8..d8e9d889bcf57 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -13,28 +13,26 @@ # License: BSD 3 clause from cpython cimport Py_INCREF, PyObject, PyTypeObject - -from libc.stdlib cimport free from libc.math cimport fabs -from libc.string cimport memcpy -from libc.string cimport memset from libc.stdint cimport SIZE_MAX -from libcpp.vector cimport vector -from libcpp.algorithm cimport pop_heap -from libcpp.algorithm cimport push_heap +from libc.stdlib cimport free +from libc.string cimport memcpy, memset from libcpp cimport bool +from libcpp.algorithm cimport pop_heap, push_heap +from libcpp.vector cimport vector import struct import numpy as np + cimport numpy as np + np.import_array() -from scipy.sparse import issparse -from scipy.sparse import csr_matrix +from scipy.sparse import csr_matrix, issparse + +from ._utils cimport safe_realloc, sizet_ptr_to_ndarray -from ._utils cimport safe_realloc -from ._utils cimport sizet_ptr_to_ndarray cdef extern from "numpy/arrayobject.h": object PyArray_NewFromDescr(PyTypeObject* subtype, np.dtype descr, @@ -59,6 +57,7 @@ cdef extern from "" namespace "std" nogil: from numpy import float32 as DTYPE from numpy import float64 as DOUBLE + cdef double INFINITY = np.inf cdef double EPSILON = np.finfo('double').eps diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index fe4aca67d7b52..d329fd5d0b6a4 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -9,9 +9,11 @@ # See _utils.pyx for details. import numpy as np + cimport numpy as np -from ._tree cimport Node + from ..neighbors._quad_tree cimport Cell +from ._tree cimport Node ctypedef np.npy_float32 DTYPE_t # Type of X ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index ba4c0f716a985..02b0897faa45f 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -7,13 +7,13 @@ # # License: BSD 3 clause -from libc.stdlib cimport free -from libc.stdlib cimport malloc -from libc.stdlib cimport realloc from libc.math cimport log as ln +from libc.stdlib cimport free, malloc, realloc import numpy as np + cimport numpy as np + np.import_array() from ..utils._random cimport our_rand_r diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index d3b082a927048..f2cbfb0ce676e 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -1,18 +1,23 @@ """ Testing for export functions of decision trees (sklearn.tree.export). """ +from io import StringIO from re import finditer, search from textwrap import dedent -from numpy.random import RandomState import pytest +from numpy.random import RandomState from sklearn.base import is_classifier -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.ensemble import GradientBoostingClassifier -from sklearn.tree import export_graphviz, plot_tree, export_text -from io import StringIO from sklearn.exceptions import NotFittedError +from sklearn.tree import ( + DecisionTreeClassifier, + DecisionTreeRegressor, + export_graphviz, + export_text, + plot_tree, +) # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/tree/tests/test_reingold_tilford.py b/sklearn/tree/tests/test_reingold_tilford.py index 8f38c997a48d7..bf0ce3ce2cffc 100644 --- a/sklearn/tree/tests/test_reingold_tilford.py +++ b/sklearn/tree/tests/test_reingold_tilford.py @@ -1,6 +1,7 @@ import numpy as np import pytest -from sklearn.tree._reingold_tilford import buchheim, Tree + +from sklearn.tree._reingold_tilford import Tree, buchheim simple_tree = Tree("", 0, Tree("", 1), Tree("", 2)) diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index a0c2f978ed147..b0b3a52a2e251 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -2,64 +2,50 @@ Testing for the tree module (sklearn.tree). """ import copy +import copyreg +import io import pickle -from itertools import product import struct -import io -import copyreg +from itertools import product -import pytest import numpy as np +import pytest +from joblib.numpy_pickle import NumpyPickler from numpy.testing import assert_allclose -from scipy.sparse import csc_matrix -from scipy.sparse import csr_matrix -from scipy.sparse import coo_matrix +from scipy.sparse import coo_matrix, csc_matrix, csr_matrix import joblib -from joblib.numpy_pickle import NumpyPickler - -from sklearn.random_projection import _sparse_random_matrix - +from sklearn import datasets, tree from sklearn.dummy import DummyRegressor - -from sklearn.metrics import accuracy_score -from sklearn.metrics import mean_squared_error -from sklearn.metrics import mean_poisson_deviance - +from sklearn.exceptions import NotFittedError +from sklearn.metrics import accuracy_score, mean_poisson_deviance, mean_squared_error from sklearn.model_selection import train_test_split - -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import create_memmap_backed_data -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import skip_if_32bit - +from sklearn.random_projection import _sparse_random_matrix +from sklearn.tree import ( + DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeClassifier, + ExtraTreeRegressor, +) +from sklearn.tree._classes import CRITERIA_CLF, CRITERIA_REG +from sklearn.tree._tree import NODE_DTYPE, TREE_LEAF, TREE_UNDEFINED +from sklearn.tree._tree import Tree as CythonTree +from sklearn.tree._tree import ( + _check_n_classes, + _check_node_ndarray, + _check_value_ndarray, +) +from sklearn.utils import _IS_32BIT, compute_sample_weight +from sklearn.utils._testing import ( + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + create_memmap_backed_data, + ignore_warnings, + skip_if_32bit, +) from sklearn.utils.estimator_checks import check_sample_weights_invariance from sklearn.utils.validation import check_random_state -from sklearn.utils import _IS_32BIT - -from sklearn.exceptions import NotFittedError - -from sklearn.tree import DecisionTreeClassifier -from sklearn.tree import DecisionTreeRegressor -from sklearn.tree import ExtraTreeClassifier -from sklearn.tree import ExtraTreeRegressor - -from sklearn import tree -from sklearn.tree._tree import TREE_LEAF, TREE_UNDEFINED -from sklearn.tree._tree import Tree as CythonTree -from sklearn.tree._tree import _check_n_classes -from sklearn.tree._tree import _check_value_ndarray -from sklearn.tree._tree import _check_node_ndarray -from sklearn.tree._tree import NODE_DTYPE - -from sklearn.tree._classes import CRITERIA_CLF -from sklearn.tree._classes import CRITERIA_REG -from sklearn import datasets - -from sklearn.utils import compute_sample_weight - CLF_CRITERIONS = ("gini", "log_loss") REG_CRITERIONS = ("squared_error", "absolute_error", "friedman_mse", "poisson") diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index aa056e92b3d12..13d9f56a880e6 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1,48 +1,45 @@ """ The :mod:`sklearn.utils` module includes various utilities. """ -import pkgutil import inspect -from importlib import import_module -from operator import itemgetter -from collections.abc import Sequence -from contextlib import contextmanager -from itertools import compress -from itertools import islice import math import numbers +import pkgutil import platform import struct import timeit +import warnings +from collections.abc import Sequence +from contextlib import contextmanager, suppress +from importlib import import_module +from itertools import compress, islice +from operator import itemgetter from pathlib import Path -from contextlib import suppress -import warnings import numpy as np from scipy.sparse import issparse -from .murmurhash import murmurhash3_32 -from .class_weight import compute_class_weight, compute_sample_weight -from . import _joblib +from .. import get_config from ..exceptions import DataConversionWarning +from . import _joblib +from ._bunch import Bunch +from ._estimator_html_repr import estimator_html_repr +from .class_weight import compute_class_weight, compute_sample_weight from .deprecation import deprecated from .fixes import parse_version, threadpool_info -from ._estimator_html_repr import estimator_html_repr +from .murmurhash import murmurhash3_32 from .validation import ( as_float_array, assert_all_finite, - check_random_state, - column_or_1d, check_array, check_consistent_length, + check_random_state, + check_scalar, + check_symmetric, check_X_y, + column_or_1d, indexable, - check_symmetric, - check_scalar, ) -from .. import get_config -from ._bunch import Bunch - # Do not deprecate parallel_backend and register_parallel_backend as they are # needed to tune `scikit-learn` behavior and have different effect if called @@ -1171,14 +1168,14 @@ def all_estimators(type_filter=None): and ``class`` is the actual type of the class. """ # lazy import to avoid circular imports from sklearn.base - from ._testing import ignore_warnings from ..base import ( BaseEstimator, ClassifierMixin, + ClusterMixin, RegressorMixin, TransformerMixin, - ClusterMixin, ) + from ._testing import ignore_warnings def is_abstract(c): if not (hasattr(c, "__abstractmethods__")): diff --git a/sklearn/utils/_cython_blas.pyx b/sklearn/utils/_cython_blas.pyx index c15e66ee02ce1..ed188eb7fbbd1 100644 --- a/sklearn/utils/_cython_blas.pyx +++ b/sklearn/utils/_cython_blas.pyx @@ -1,17 +1,28 @@ from cython cimport floating - -from scipy.linalg.cython_blas cimport sdot, ddot -from scipy.linalg.cython_blas cimport sasum, dasum -from scipy.linalg.cython_blas cimport saxpy, daxpy -from scipy.linalg.cython_blas cimport snrm2, dnrm2 -from scipy.linalg.cython_blas cimport scopy, dcopy -from scipy.linalg.cython_blas cimport sscal, dscal -from scipy.linalg.cython_blas cimport srotg, drotg -from scipy.linalg.cython_blas cimport srot, drot -from scipy.linalg.cython_blas cimport sgemv, dgemv -from scipy.linalg.cython_blas cimport sger, dger -from scipy.linalg.cython_blas cimport sgemm, dgemm - +from scipy.linalg.cython_blas cimport ( + dasum, + daxpy, + dcopy, + ddot, + dgemm, + dgemv, + dger, + dnrm2, + drot, + drotg, + dscal, + sasum, + saxpy, + scopy, + sdot, + sgemm, + sgemv, + sger, + snrm2, + srot, + srotg, + sscal, +) ################ # BLAS Level 1 # diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py index de48890fcaacf..5affa4616be01 100644 --- a/sklearn/utils/_encode.py +++ b/sklearn/utils/_encode.py @@ -1,8 +1,9 @@ -from contextlib import suppress from collections import Counter +from contextlib import suppress from typing import NamedTuple import numpy as np + from . import is_scalar_nan diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py index f8911b5c38b08..4f10600053d1b 100644 --- a/sklearn/utils/_estimator_html_repr.py +++ b/sklearn/utils/_estimator_html_repr.py @@ -1,8 +1,7 @@ -from contextlib import closing -from contextlib import suppress +import html +from contextlib import closing, suppress from io import StringIO from string import Template -import html from .. import config_context diff --git a/sklearn/utils/_fast_dict.pxd b/sklearn/utils/_fast_dict.pxd index 1bcc149a54ab5..df2c76b8377e4 100644 --- a/sklearn/utils/_fast_dict.pxd +++ b/sklearn/utils/_fast_dict.pxd @@ -5,10 +5,9 @@ Uses C++ map containers for fast dict-like behavior with keys being integers, and values float. """ -from libcpp.map cimport map as cpp_map - # Import the C-level symbols of numpy cimport numpy as cnp +from libcpp.map cimport map as cpp_map ctypedef cnp.float64_t DTYPE_t diff --git a/sklearn/utils/_fast_dict.pyx b/sklearn/utils/_fast_dict.pyx index 6d7e62eefc07f..7651d85b1a6a1 100644 --- a/sklearn/utils/_fast_dict.pyx +++ b/sklearn/utils/_fast_dict.pyx @@ -8,14 +8,16 @@ integers, and values float. cimport cython # C++ -from cython.operator cimport dereference as deref, preincrement as inc, \ - predecrement as dec -from libcpp.utility cimport pair +from cython.operator cimport dereference as deref +from cython.operator cimport predecrement as dec +from cython.operator cimport preincrement as inc from libcpp.map cimport map as cpp_map +from libcpp.utility cimport pair import numpy as np # Import the C-level symbols of numpy + cimport numpy as np # Numpy must be initialized. When using numpy from C or Cython you must @@ -68,7 +70,7 @@ cdef class IntFloatDict: # while it != end: # yield deref(it).first, deref(it).second # inc(it) - + def __iter__(self): cdef int size = self.my_map.size() cdef ITYPE_t [:] keys = np.empty(size, dtype=np.intp) @@ -147,4 +149,3 @@ def argmin(IntFloatDict d): min_key = deref(it).first inc(it) return min_key, min_value - diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py index 8cbe084c94992..590fdc6170c64 100644 --- a/sklearn/utils/_joblib.py +++ b/sklearn/utils/_joblib.py @@ -5,13 +5,20 @@ # joblib imports may raise DeprecationWarning on certain Python # versions import joblib - from joblib import logger - from joblib import dump, load - from joblib import __version__ - from joblib import effective_n_jobs - from joblib import hash - from joblib import cpu_count, Parallel, Memory, delayed - from joblib import parallel_backend, register_parallel_backend + from joblib import ( + Memory, + Parallel, + __version__, + cpu_count, + delayed, + dump, + effective_n_jobs, + hash, + load, + logger, + parallel_backend, + register_parallel_backend, + ) __all__ = [ diff --git a/sklearn/utils/_logistic_sigmoid.pyx b/sklearn/utils/_logistic_sigmoid.pyx index c2ba685dbfcbd..8e139264fa313 100644 --- a/sklearn/utils/_logistic_sigmoid.pyx +++ b/sklearn/utils/_logistic_sigmoid.pyx @@ -1,6 +1,7 @@ -from libc.math cimport log, exp +from libc.math cimport exp, log import numpy as np + cimport numpy as np np.import_array() diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py index d57cf839d962f..07332bf1edbd4 100644 --- a/sklearn/utils/_mask.py +++ b/sklearn/utils/_mask.py @@ -1,6 +1,7 @@ +from contextlib import suppress + import numpy as np from scipy import sparse as sp -from contextlib import suppress from . import is_scalar_nan from .fixes import _object_dtype_isnan diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py index c7451dce1fbc5..7edf9b7fc9ac3 100644 --- a/sklearn/utils/_mocking.py +++ b/sklearn/utils/_mocking.py @@ -1,8 +1,7 @@ import numpy as np from ..base import BaseEstimator, ClassifierMixin -from .validation import _check_sample_weight, _num_samples, check_array -from .validation import check_is_fitted +from .validation import _check_sample_weight, _num_samples, check_array, check_is_fitted class ArraySlicingWrapper: diff --git a/sklearn/utils/_pprint.py b/sklearn/utils/_pprint.py index c96b1ce764c4a..cea1510746cbe 100644 --- a/sklearn/utils/_pprint.py +++ b/sklearn/utils/_pprint.py @@ -67,8 +67,8 @@ import pprint from collections import OrderedDict -from ..base import BaseEstimator from .._config import get_config +from ..base import BaseEstimator from . import is_scalar_nan diff --git a/sklearn/utils/_random.pxd b/sklearn/utils/_random.pxd index 73b5505bc0e80..51b48deacdf72 100644 --- a/sklearn/utils/_random.pxd +++ b/sklearn/utils/_random.pxd @@ -4,7 +4,9 @@ import numpy as np + cimport numpy as cnp + ctypedef cnp.npy_uint32 UINT32_t cdef inline UINT32_t DEFAULT_SEED = 1 diff --git a/sklearn/utils/_random.pyx b/sklearn/utils/_random.pyx index 0fb3822807eca..5fec389608a6f 100644 --- a/sklearn/utils/_random.pyx +++ b/sklearn/utils/_random.pyx @@ -13,11 +13,14 @@ The module contains: cimport cython import numpy as np + cimport numpy as cnp + cnp.import_array() from . import check_random_state + cdef UINT32_t DEFAULT_SEED = 1 diff --git a/sklearn/utils/_readonly_array_wrapper.pyx b/sklearn/utils/_readonly_array_wrapper.pyx index 2c81330df2eb0..842ab3a0eba94 100644 --- a/sklearn/utils/_readonly_array_wrapper.pyx +++ b/sklearn/utils/_readonly_array_wrapper.pyx @@ -12,11 +12,11 @@ This way, we can use it on arrays that we don't touch. # TODO: Remove with Cython >= 3.0 which supports const memoryviews for fused types. from cpython cimport Py_buffer -from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_WRITABLE +from cpython.buffer cimport PyBUF_WRITABLE, PyBuffer_Release, PyObject_GetBuffer import numpy as np -cimport numpy as np +cimport numpy as np np.import_array() diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py index dbef0d5bb4bcf..b387e2fd6f648 100644 --- a/sklearn/utils/_show_versions.py +++ b/sklearn/utils/_show_versions.py @@ -7,10 +7,9 @@ import platform import sys -from ..utils.fixes import threadpool_info -from .. import __version__ - +from .. import __version__ +from ..utils.fixes import threadpool_info from ._openmp_helpers import _openmp_parallelism_enabled @@ -68,7 +67,7 @@ def _get_deps_info(): # therefore on our CI. # https://github.com/conda-forge/conda-forge-pinning-feedstock/issues/2089 try: - from pkg_resources import get_distribution, DistributionNotFound + from pkg_resources import DistributionNotFound, get_distribution for modname in deps: try: @@ -82,7 +81,7 @@ def _get_deps_info(): deps_info[modname] = None else: - from importlib.metadata import version, PackageNotFoundError + from importlib.metadata import PackageNotFoundError, version for modname in deps: try: diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd index 412d67c479fac..19f1594e3fe55 100644 --- a/sklearn/utils/_sorting.pxd +++ b/sklearn/utils/_sorting.pxd @@ -1,6 +1,7 @@ +from cython cimport floating + from ._typedefs cimport DTYPE_t, ITYPE_t -from cython cimport floating cdef int simultaneous_sort( floating *dist, diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx index 367448b5cb91b..22da3b95d79f2 100644 --- a/sklearn/utils/_sorting.pyx +++ b/sklearn/utils/_sorting.pyx @@ -1,5 +1,6 @@ from cython cimport floating + cdef inline void dual_swap( floating* darr, ITYPE_t *iarr, diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 453f3437307a9..72e8e700c9525 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -10,57 +10,52 @@ # Giorgio Patrini # Thierry Guillemot # License: BSD 3 clause +import atexit +import contextlib +import functools +import inspect import os import os.path as op -import inspect -import warnings +import re +import shutil import sys -import functools import tempfile -from subprocess import check_output, STDOUT, CalledProcessError -from subprocess import TimeoutExpired -import re -import contextlib +import unittest +import warnings from collections.abc import Iterable - -import scipy as sp from functools import wraps from inspect import signature - -import shutil -import atexit -import unittest +from subprocess import STDOUT, CalledProcessError, TimeoutExpired, check_output from unittest import TestCase +import scipy as sp + # WindowsError only exist on Windows try: WindowsError except NameError: WindowsError = None -from numpy.testing import assert_allclose as np_assert_allclose -from numpy.testing import assert_almost_equal -from numpy.testing import assert_approx_equal -from numpy.testing import assert_array_equal -from numpy.testing import assert_array_almost_equal -from numpy.testing import assert_array_less import numpy as np -import joblib +from numpy.testing import assert_allclose as np_assert_allclose +from numpy.testing import ( + assert_almost_equal, + assert_approx_equal, + assert_array_almost_equal, + assert_array_equal, + assert_array_less, +) +import joblib import sklearn from sklearn.utils import ( - IS_PYPY, _IS_32BIT, - deprecated, + IS_PYPY, _in_unstable_openblas_configuration, + deprecated, ) from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import ( - check_array, - check_is_fitted, - check_X_y, -) - +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y __all__ = [ "assert_raises", diff --git a/sklearn/utils/_vector_sentinel.pxd b/sklearn/utils/_vector_sentinel.pxd index 5fa0f6ad8d00a..3f9ca3120002d 100644 --- a/sklearn/utils/_vector_sentinel.pxd +++ b/sklearn/utils/_vector_sentinel.pxd @@ -1,7 +1,7 @@ cimport numpy as np - from libcpp.vector cimport vector -from ..utils._typedefs cimport ITYPE_t, DTYPE_t, INT32TYPE_t, INT64TYPE_t + +from ..utils._typedefs cimport DTYPE_t, INT32TYPE_t, INT64TYPE_t, ITYPE_t ctypedef fused vector_typed: vector[DTYPE_t] diff --git a/sklearn/utils/_vector_sentinel.pyx b/sklearn/utils/_vector_sentinel.pyx index 0938ada0f56c1..0588a55bcd7f8 100644 --- a/sklearn/utils/_vector_sentinel.pyx +++ b/sklearn/utils/_vector_sentinel.pyx @@ -1,8 +1,8 @@ -from cython.operator cimport dereference as deref -from cpython.ref cimport Py_INCREF cimport numpy as np +from cpython.ref cimport Py_INCREF +from cython.operator cimport dereference as deref -from ._typedefs cimport DTYPECODE, ITYPECODE, INT32TYPECODE, INT64TYPECODE +from ._typedefs cimport DTYPECODE, INT32TYPECODE, INT64TYPECODE, ITYPECODE np.import_array() diff --git a/sklearn/utils/arrayfuncs.pyx b/sklearn/utils/arrayfuncs.pyx index f494499923c71..f6509b81dabcb 100644 --- a/sklearn/utils/arrayfuncs.pyx +++ b/sklearn/utils/arrayfuncs.pyx @@ -4,13 +4,15 @@ Small collection of auxiliary functions that operate on arrays """ cimport numpy as np -import numpy as np + +import numpy as np + cimport cython from cython cimport floating -from libc.math cimport fabs from libc.float cimport DBL_MAX, FLT_MAX +from libc.math cimport fabs -from ._cython_blas cimport _copy, _rotg, _rot +from ._cython_blas cimport _copy, _rot, _rotg ctypedef np.float64_t DOUBLE @@ -54,7 +56,7 @@ def cholesky_delete(np.ndarray[floating, ndim=2] L, int go_out): floating c, s floating *L1 int i - + if floating is float: m /= sizeof(float) else: diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index bdcee747129d9..ef0e7c13612f3 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -3,7 +3,6 @@ # License: BSD 3 clause import numpy as np - from scipy import sparse diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py index 2ee07154dc49b..a1c941bbefc09 100644 --- a/sklearn/utils/deprecation.py +++ b/sklearn/utils/deprecation.py @@ -1,6 +1,5 @@ -import warnings import functools - +import warnings __all__ = ["deprecated"] diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 33cd54a5cb4ac..4c538bab992e3 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1,7 +1,7 @@ -import types -import warnings import pickle import re +import types +import warnings from copy import deepcopy from functools import partial, wraps from inspect import signature @@ -9,68 +9,59 @@ import numpy as np from scipy import sparse from scipy.stats import rankdata + import joblib -from . import IS_PYPY from .. import config_context -from ._testing import _get_args -from ._testing import assert_raise_message -from ._testing import assert_array_equal -from ._testing import assert_array_almost_equal -from ._testing import assert_allclose -from ._testing import assert_allclose_dense_sparse -from ._testing import assert_array_less -from ._testing import set_random_state -from ._testing import SkipTest -from ._testing import ignore_warnings -from ._testing import create_memmap_backed_data -from ._testing import raises -from . import is_scalar_nan - -from ..linear_model import LinearRegression -from ..linear_model import LogisticRegression -from ..linear_model import RANSACRegressor -from ..linear_model import Ridge -from ..linear_model import SGDRegressor - from ..base import ( - clone, ClusterMixin, + RegressorMixin, + clone, is_classifier, - is_regressor, is_outlier_detector, - RegressorMixin, -) - -from ..metrics import accuracy_score, adjusted_rand_score, f1_score -from ..random_projection import BaseRandomProjection -from ..feature_selection import SelectKBest -from ..feature_selection import SelectFromModel -from ..pipeline import make_pipeline -from ..exceptions import DataConversionWarning -from ..exceptions import NotFittedError -from ..exceptions import SkipTestWarning -from ..model_selection import train_test_split -from ..model_selection import ShuffleSplit -from ..model_selection._validation import _safe_split -from ..metrics.pairwise import rbf_kernel, linear_kernel, pairwise_distances -from ..utils.fixes import threadpool_info -from ..utils.validation import check_is_fitted - -from . import shuffle -from ._tags import ( - _DEFAULT_TAGS, - _safe_tags, + is_regressor, ) -from .validation import has_fit_parameter, _num_samples -from ..preprocessing import StandardScaler -from ..preprocessing import scale from ..datasets import ( load_iris, make_blobs, make_multilabel_classification, make_regression, ) +from ..exceptions import DataConversionWarning, NotFittedError, SkipTestWarning +from ..feature_selection import SelectFromModel, SelectKBest +from ..linear_model import ( + LinearRegression, + LogisticRegression, + RANSACRegressor, + Ridge, + SGDRegressor, +) +from ..metrics import accuracy_score, adjusted_rand_score, f1_score +from ..metrics.pairwise import linear_kernel, pairwise_distances, rbf_kernel +from ..model_selection import ShuffleSplit, train_test_split +from ..model_selection._validation import _safe_split +from ..pipeline import make_pipeline +from ..preprocessing import StandardScaler, scale +from ..random_projection import BaseRandomProjection +from ..utils.fixes import threadpool_info +from ..utils.validation import check_is_fitted +from . import IS_PYPY, is_scalar_nan, shuffle +from ._tags import _DEFAULT_TAGS, _safe_tags +from ._testing import ( + SkipTest, + _get_args, + assert_allclose, + assert_allclose_dense_sparse, + assert_array_almost_equal, + assert_array_equal, + assert_array_less, + assert_raise_message, + create_memmap_backed_data, + ignore_warnings, + raises, + set_random_state, +) +from .validation import _num_samples, has_fit_parameter REGRESSION_DATASET = None CROSS_DECOMPOSITION = ["PLSCanonical", "PLSRegression", "CCA", "PLSSVD"] diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index b0074ae7e3a18..8953b73515dec 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -10,18 +10,19 @@ # # License: BSD 3 clause -from functools import update_wrapper import functools +from functools import update_wrapper -import sklearn import numpy as np import scipy import scipy.stats import threadpoolctl + +import sklearn + from .._config import config_context, get_config from ..externals._packaging.version import parse as parse_version - np_version = parse_version(np.__version__) sp_version = parse_version(scipy.__version__) @@ -35,9 +36,12 @@ from ..externals._lobpcg import lobpcg # type: ignore # noqa try: - from scipy.optimize._linesearch import line_search_wolfe2, line_search_wolfe1 + from scipy.optimize._linesearch import line_search_wolfe1, line_search_wolfe2 except ImportError: # SciPy < 1.8 - from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 # type: ignore # noqa + from scipy.optimize.linesearch import ( # type: ignore # noqa + line_search_wolfe1, + line_search_wolfe2, + ) def _object_dtype_isnan(X): diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py index 78cdf8d31fdca..f7ef0a051e3c3 100644 --- a/sklearn/utils/graph.py +++ b/sklearn/utils/graph.py @@ -13,8 +13,8 @@ import numpy as np from scipy import sparse -from .deprecation import deprecated from ..metrics.pairwise import pairwise_distances +from .deprecation import deprecated ############################################################################### diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index 1cee8d1d42cf4..8b3aa3b5bcd4a 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -2,20 +2,19 @@ # Author: Joel Nothman # Andreas Mueller # License: BSD -from typing import List, Any -from types import MethodType import warnings -from functools import wraps - from abc import ABCMeta, abstractmethod +from contextlib import suppress +from functools import update_wrapper, wraps from operator import attrgetter -from functools import update_wrapper +from types import MethodType +from typing import Any, List + import numpy as np -from contextlib import suppress +from ..base import BaseEstimator from ..utils import _safe_indexing from ..utils._tags import _safe_tags -from ..base import BaseEstimator __all__ = ["available_if", "if_delegate_has_method"] diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py index 5311076e64eb8..d6b1d43ef9e75 100644 --- a/sklearn/utils/multiclass.py +++ b/sklearn/utils/multiclass.py @@ -6,17 +6,14 @@ ========================================== """ +import warnings from collections.abc import Sequence from itertools import chain -import warnings - -from scipy.sparse import issparse -from scipy.sparse import dok_matrix -from scipy.sparse import lil_matrix import numpy as np +from scipy.sparse import dok_matrix, issparse, lil_matrix -from .validation import check_array, _assert_all_finite +from .validation import _assert_all_finite, check_array def _unique_multiclass(y): diff --git a/sklearn/utils/murmurhash.pyx b/sklearn/utils/murmurhash.pyx index dc9c3da08906f..0e6a71e82002e 100644 --- a/sklearn/utils/murmurhash.pyx +++ b/sklearn/utils/murmurhash.pyx @@ -17,8 +17,10 @@ and can be found here: cimport cython cimport numpy as np + import numpy as np + cdef extern from "src/MurmurHash3.h": void MurmurHash3_x86_32(void *key, int len, np.uint32_t seed, void *out) void MurmurHash3_x86_128(void *key, int len, np.uint32_t seed, void *out) diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py index 7e9b864afe043..68a1ae1dddb98 100644 --- a/sklearn/utils/optimize.py +++ b/sklearn/utils/optimize.py @@ -13,11 +13,12 @@ # Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour # License: BSD -import numpy as np import warnings -from .fixes import line_search_wolfe1, line_search_wolfe2 +import numpy as np + from ..exceptions import ConvergenceWarning +from .fixes import line_search_wolfe1, line_search_wolfe2 class _LineSearchError(RuntimeError): diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py index e3bdf2c6c7298..19e3a8c89addb 100644 --- a/sklearn/utils/random.py +++ b/sklearn/utils/random.py @@ -1,9 +1,10 @@ # Author: Hamzeh Alsalhi # # License: BSD 3 clause +import array + import numpy as np import scipy.sparse as sp -import array from . import check_random_state from ._random import sample_without_replacement diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py index d53741c044c47..95f1bc36b2b1a 100644 --- a/sklearn/utils/sparsefuncs.py +++ b/sklearn/utils/sparsefuncs.py @@ -3,15 +3,13 @@ # Giorgio Patrini # # License: BSD 3 clause -import scipy.sparse as sp import numpy as np +import scipy.sparse as sp -from .sparsefuncs_fast import ( - csr_mean_variance_axis0 as _csr_mean_var_axis0, - csc_mean_variance_axis0 as _csc_mean_var_axis0, - incr_mean_variance_axis0 as _incr_mean_var_axis0, -) from ..utils.validation import _check_sample_weight +from .sparsefuncs_fast import csc_mean_variance_axis0 as _csc_mean_var_axis0 +from .sparsefuncs_fast import csr_mean_variance_axis0 as _csr_mean_var_axis0 +from .sparsefuncs_fast import incr_mean_variance_axis0 as _incr_mean_var_axis0 def _raise_typeerror(X): diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index ee12730d02b2d..eb5fe00e20928 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -8,9 +8,11 @@ #!python -from libc.math cimport fabs, sqrt, pow cimport numpy as np +from libc.math cimport fabs, pow, sqrt + import numpy as np + cimport cython from cython cimport floating from numpy.math cimport isnan diff --git a/sklearn/utils/tests/test_arrayfuncs.py b/sklearn/utils/tests/test_arrayfuncs.py index 5c43e480d395c..b0a02e13d1639 100644 --- a/sklearn/utils/tests/test_arrayfuncs.py +++ b/sklearn/utils/tests/test_arrayfuncs.py @@ -1,5 +1,5 @@ -import pytest import numpy as np +import pytest from sklearn.utils._testing import assert_allclose from sklearn.utils.arrayfuncs import min_pos diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index 61f43c69050d1..9072f951eea7c 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -6,11 +6,8 @@ from sklearn.datasets import make_blobs from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier - -from sklearn.utils.class_weight import compute_class_weight -from sklearn.utils.class_weight import compute_sample_weight -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_almost_equal, assert_array_almost_equal +from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight def test_compute_class_weight(): diff --git a/sklearn/utils/tests/test_cython_blas.py b/sklearn/utils/tests/test_cython_blas.py index 1b311f5160db5..e57bfc3ec5a9c 100644 --- a/sklearn/utils/tests/test_cython_blas.py +++ b/sklearn/utils/tests/test_cython_blas.py @@ -1,21 +1,24 @@ -import pytest - import numpy as np +import pytest +from sklearn.utils._cython_blas import ( + ColMajor, + NoTrans, + RowMajor, + Trans, + _asum_memview, + _axpy_memview, + _copy_memview, + _dot_memview, + _gemm_memview, + _gemv_memview, + _ger_memview, + _nrm2_memview, + _rot_memview, + _rotg_memview, + _scal_memview, +) from sklearn.utils._testing import assert_allclose -from sklearn.utils._cython_blas import _dot_memview -from sklearn.utils._cython_blas import _asum_memview -from sklearn.utils._cython_blas import _axpy_memview -from sklearn.utils._cython_blas import _nrm2_memview -from sklearn.utils._cython_blas import _copy_memview -from sklearn.utils._cython_blas import _scal_memview -from sklearn.utils._cython_blas import _rotg_memview -from sklearn.utils._cython_blas import _rot_memview -from sklearn.utils._cython_blas import _gemv_memview -from sklearn.utils._cython_blas import _ger_memview -from sklearn.utils._cython_blas import _gemm_memview -from sklearn.utils._cython_blas import RowMajor, ColMajor -from sklearn.utils._cython_blas import Trans, NoTrans def _numpy_to_cython(dtype): diff --git a/sklearn/utils/tests/test_cython_templating.py b/sklearn/utils/tests/test_cython_templating.py index eeb8319e07415..f5c9fa7a9087e 100644 --- a/sklearn/utils/tests/test_cython_templating.py +++ b/sklearn/utils/tests/test_cython_templating.py @@ -1,5 +1,7 @@ import pathlib + import pytest + import sklearn diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py index e39486cc0318a..88fe43ffb7b16 100644 --- a/sklearn/utils/tests/test_deprecation.py +++ b/sklearn/utils/tests/test_deprecation.py @@ -4,10 +4,10 @@ import pickle -from sklearn.utils.deprecation import _is_deprecated -from sklearn.utils.deprecation import deprecated import pytest +from sklearn.utils.deprecation import _is_deprecated, deprecated + @deprecated("qwerty") class MockClass1: diff --git a/sklearn/utils/tests/test_encode.py b/sklearn/utils/tests/test_encode.py index 083db25b7ca80..9118eb56f0ba4 100644 --- a/sklearn/utils/tests/test_encode.py +++ b/sklearn/utils/tests/test_encode.py @@ -4,10 +4,7 @@ import pytest from numpy.testing import assert_array_equal -from sklearn.utils._encode import _unique -from sklearn.utils._encode import _encode -from sklearn.utils._encode import _check_unknown -from sklearn.utils._encode import _get_counts +from sklearn.utils._encode import _check_unknown, _encode, _get_counts, _unique @pytest.mark.parametrize( diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 3a88b4431fe86..df1aa28bc4dd2 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -2,43 +2,42 @@ # build_tools/azure/test_pytest_soft_dependency.sh on these # tests to make sure estimator_checks works without pytest. -import unittest import sys +import unittest import warnings import numpy as np import scipy.sparse as sp -import joblib +import joblib from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.cluster import MiniBatchKMeans from sklearn.datasets import make_multilabel_classification -from sklearn.utils import deprecated +from sklearn.decomposition import PCA +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.exceptions import SkipTestWarning +from sklearn.linear_model import ( + LinearRegression, + LogisticRegression, + MultiTaskElasticNet, + SGDClassifier, +) +from sklearn.mixture import GaussianMixture +from sklearn.neighbors import KNeighborsRegressor +from sklearn.svm import SVC, NuSVC +from sklearn.utils import all_estimators, deprecated from sklearn.utils._testing import ( - raises, - ignore_warnings, MinimalClassifier, MinimalRegressor, MinimalTransformer, SkipTest, + ignore_warnings, + raises, ) - -from sklearn.utils.validation import check_is_fitted, check_X_y -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.linear_model import LinearRegression, SGDClassifier -from sklearn.mixture import GaussianMixture -from sklearn.cluster import MiniBatchKMeans -from sklearn.decomposition import PCA -from sklearn.linear_model import MultiTaskElasticNet, LogisticRegression -from sklearn.svm import SVC, NuSVC -from sklearn.neighbors import KNeighborsRegressor -from sklearn.utils.validation import check_array -from sklearn.utils import all_estimators -from sklearn.exceptions import SkipTestWarning -from sklearn.utils.metaestimators import available_if - from sklearn.utils.estimator_checks import ( _NotAnArray, _set_checking_parameters, + _yield_all_checks, check_class_weight_balanced_linear_classifier, check_classifier_data_not_an_array, check_classifiers_multilabel_output_format_decision_function, @@ -48,17 +47,18 @@ check_estimator, check_estimator_get_tags_default_keys, check_estimators_unfitted, + check_fit_check_is_fitted, check_fit_score_takes_y, + check_methods_sample_order_invariance, + check_methods_subset_invariance, check_no_attributes_set_in_init, + check_outlier_corruption, check_regressor_data_not_an_array, check_requires_y_none, - check_outlier_corruption, set_random_state, - check_fit_check_is_fitted, - check_methods_sample_order_invariance, - check_methods_subset_invariance, - _yield_all_checks, ) +from sklearn.utils.metaestimators import available_if +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y class CorrectNotFittedError(ValueError): diff --git a/sklearn/utils/tests/test_estimator_html_repr.py b/sklearn/utils/tests/test_estimator_html_repr.py index 91644819864eb..696a9eff7a911 100644 --- a/sklearn/utils/tests/test_estimator_html_repr.py +++ b/sklearn/utils/tests/test_estimator_html_repr.py @@ -1,37 +1,31 @@ -from contextlib import closing import html +from contextlib import closing from io import StringIO import pytest from sklearn import config_context -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier -from sklearn.impute import SimpleImputer -from sklearn.decomposition import PCA -from sklearn.decomposition import TruncatedSVD -from sklearn.pipeline import Pipeline -from sklearn.pipeline import FeatureUnion +from sklearn.cluster import AgglomerativeClustering, Birch from sklearn.compose import ColumnTransformer -from sklearn.ensemble import VotingClassifier +from sklearn.decomposition import PCA, TruncatedSVD +from sklearn.ensemble import StackingClassifier, StackingRegressor, VotingClassifier from sklearn.feature_selection import SelectPercentile -from sklearn.cluster import Birch -from sklearn.cluster import AgglomerativeClustering -from sklearn.preprocessing import OneHotEncoder -from sklearn.preprocessing import StandardScaler -from sklearn.svm import LinearSVC -from sklearn.svm import LinearSVR -from sklearn.tree import DecisionTreeClassifier -from sklearn.multiclass import OneVsOneClassifier -from sklearn.ensemble import StackingClassifier -from sklearn.ensemble import StackingRegressor from sklearn.gaussian_process.kernels import ExpSineSquared +from sklearn.impute import SimpleImputer from sklearn.kernel_ridge import KernelRidge - +from sklearn.linear_model import LogisticRegression from sklearn.model_selection import RandomizedSearchCV -from sklearn.utils._estimator_html_repr import _write_label_html -from sklearn.utils._estimator_html_repr import _get_visual_block -from sklearn.utils._estimator_html_repr import estimator_html_repr +from sklearn.multiclass import OneVsOneClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.pipeline import FeatureUnion, Pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.svm import LinearSVC, LinearSVR +from sklearn.tree import DecisionTreeClassifier +from sklearn.utils._estimator_html_repr import ( + _get_visual_block, + _write_label_html, + estimator_html_repr, +) @pytest.mark.parametrize("checked", [True, False]) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index ece7c180300a1..41df3f08d0574 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -5,35 +5,38 @@ # License: BSD 3 clause import numpy as np -from scipy import sparse -from scipy import linalg -from scipy import stats +import pytest +from scipy import linalg, sparse, stats from scipy.sparse.linalg import eigsh from scipy.special import expit -import pytest +from sklearn.datasets import make_low_rank_matrix, make_sparse_spd_matrix from sklearn.utils import gen_batches from sklearn.utils._arpack import _init_arpack_v0 -from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import skip_if_32bit - -from sklearn.utils.extmath import density, _safe_accumulator_op -from sklearn.utils.extmath import randomized_svd, _randomized_eigsh -from sklearn.utils.extmath import row_norms -from sklearn.utils.extmath import weighted_mode -from sklearn.utils.extmath import cartesian -from sklearn.utils.extmath import log_logistic -from sklearn.utils.extmath import svd_flip -from sklearn.utils.extmath import _incremental_mean_and_var -from sklearn.utils.extmath import _deterministic_vector_sign_flip -from sklearn.utils.extmath import softmax -from sklearn.utils.extmath import stable_cumsum -from sklearn.utils.extmath import safe_sparse_dot -from sklearn.datasets import make_low_rank_matrix, make_sparse_spd_matrix +from sklearn.utils._testing import ( + assert_allclose, + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_almost_equal, + assert_array_equal, + skip_if_32bit, +) +from sklearn.utils.extmath import ( + _deterministic_vector_sign_flip, + _incremental_mean_and_var, + _randomized_eigsh, + _safe_accumulator_op, + cartesian, + density, + log_logistic, + randomized_svd, + row_norms, + safe_sparse_dot, + softmax, + stable_cumsum, + svd_flip, + weighted_mode, +) def test_density(): diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 3566897da5efc..635cb0f7b9842 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -10,9 +10,7 @@ import scipy.stats from sklearn.utils._testing import assert_array_equal - -from sklearn.utils.fixes import _object_dtype_isnan -from sklearn.utils.fixes import loguniform +from sklearn.utils.fixes import _object_dtype_isnan, loguniform @pytest.mark.parametrize("dtype, val", ([object, 1], [object, "a"], [float, 1])) diff --git a/sklearn/utils/tests/test_graph.py b/sklearn/utils/tests/test_graph.py index 78196fbb05fba..d64108a40d8ab 100644 --- a/sklearn/utils/tests/test_graph.py +++ b/sklearn/utils/tests/test_graph.py @@ -1,10 +1,10 @@ -import pytest import numpy as np +import pytest from scipy.sparse.csgraph import connected_components +from sklearn.metrics.pairwise import pairwise_distances from sklearn.neighbors import kneighbors_graph from sklearn.utils.graph import _fix_connected_components -from sklearn.metrics.pairwise import pairwise_distances def test_fix_connected_components(): diff --git a/sklearn/utils/tests/test_metaestimators.py b/sklearn/utils/tests/test_metaestimators.py index 2a75ab387df60..26215f20c3a33 100644 --- a/sklearn/utils/tests/test_metaestimators.py +++ b/sklearn/utils/tests/test_metaestimators.py @@ -1,11 +1,10 @@ -import numpy as np -import pytest +import pickle import warnings -import pickle +import numpy as np +import pytest -from sklearn.utils.metaestimators import if_delegate_has_method -from sklearn.utils.metaestimators import available_if +from sklearn.utils.metaestimators import available_if, if_delegate_has_method class Prefix: diff --git a/sklearn/utils/tests/test_mocking.py b/sklearn/utils/tests/test_mocking.py index a12c41256581a..3c15384bf4757 100644 --- a/sklearn/utils/tests/test_mocking.py +++ b/sklearn/utils/tests/test_mocking.py @@ -1,16 +1,12 @@ import numpy as np import pytest +from numpy.testing import assert_allclose, assert_array_equal from scipy import sparse -from numpy.testing import assert_array_equal -from numpy.testing import assert_allclose - from sklearn.datasets import load_iris -from sklearn.utils import check_array -from sklearn.utils import _safe_indexing -from sklearn.utils._testing import _convert_container - +from sklearn.utils import _safe_indexing, check_array from sklearn.utils._mocking import CheckingClassifier +from sklearn.utils._testing import _convert_container @pytest.fixture diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index 996a25bc3a42b..a336e5ea3f412 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -1,32 +1,35 @@ -import numpy as np -import scipy.sparse as sp from itertools import product -import pytest - -from scipy.sparse import issparse -from scipy.sparse import csc_matrix -from scipy.sparse import csr_matrix -from scipy.sparse import coo_matrix -from scipy.sparse import dok_matrix -from scipy.sparse import lil_matrix -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils._testing import assert_allclose -from sklearn.utils.estimator_checks import _NotAnArray - -from sklearn.utils.multiclass import unique_labels -from sklearn.utils.multiclass import is_multilabel -from sklearn.utils.multiclass import type_of_target -from sklearn.utils.multiclass import class_distribution -from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.multiclass import _ovr_decision_function +import numpy as np +import pytest +import scipy.sparse as sp +from scipy.sparse import ( + coo_matrix, + csc_matrix, + csr_matrix, + dok_matrix, + issparse, + lil_matrix, +) -from sklearn.utils.metaestimators import _safe_split +from sklearn import datasets from sklearn.model_selection import ShuffleSplit from sklearn.svm import SVC -from sklearn import datasets - +from sklearn.utils._testing import ( + assert_allclose, + assert_array_almost_equal, + assert_array_equal, +) +from sklearn.utils.estimator_checks import _NotAnArray +from sklearn.utils.metaestimators import _safe_split +from sklearn.utils.multiclass import ( + _ovr_decision_function, + check_classification_targets, + class_distribution, + is_multilabel, + type_of_target, + unique_labels, +) EXAMPLES = { "multilabel-indicator": [ diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py index 4403c9a49275c..18730302124f9 100644 --- a/sklearn/utils/tests/test_murmurhash.py +++ b/sklearn/utils/tests/test_murmurhash.py @@ -3,9 +3,9 @@ # License: BSD 3 clause import numpy as np +from numpy.testing import assert_array_almost_equal, assert_array_equal + from sklearn.utils.murmurhash import murmurhash3_32 -from numpy.testing import assert_array_almost_equal -from numpy.testing import assert_array_equal def test_mmhash3_int(): diff --git a/sklearn/utils/tests/test_optimize.py b/sklearn/utils/tests/test_optimize.py index 82719635366b0..a8bcd1aebf793 100644 --- a/sklearn/utils/tests/test_optimize.py +++ b/sklearn/utils/tests/test_optimize.py @@ -1,9 +1,8 @@ import numpy as np - -from sklearn.utils.optimize import _newton_cg from scipy.optimize import fmin_ncg from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils.optimize import _newton_cg def test_newton_cg(): diff --git a/sklearn/utils/tests/test_parallel.py b/sklearn/utils/tests/test_parallel.py index dfecd7b464168..45c068170baa3 100644 --- a/sklearn/utils/tests/test_parallel.py +++ b/sklearn/utils/tests/test_parallel.py @@ -1,8 +1,7 @@ import pytest -from joblib import Parallel - from numpy.testing import assert_array_equal +from joblib import Parallel from sklearn._config import config_context, get_config from sklearn.utils.fixes import delayed diff --git a/sklearn/utils/tests/test_pprint.py b/sklearn/utils/tests/test_pprint.py index aa1e2e03841e9..c38f1b675d0d9 100644 --- a/sklearn/utils/tests/test_pprint.py +++ b/sklearn/utils/tests/test_pprint.py @@ -3,13 +3,12 @@ import numpy as np -from sklearn.utils._pprint import _EstimatorPrettyPrinter -from sklearn.linear_model import LogisticRegressionCV -from sklearn.pipeline import make_pipeline +from sklearn import config_context from sklearn.base import BaseEstimator, TransformerMixin from sklearn.feature_selection import SelectKBest, chi2 -from sklearn import config_context - +from sklearn.linear_model import LogisticRegressionCV +from sklearn.pipeline import make_pipeline +from sklearn.utils._pprint import _EstimatorPrettyPrinter # Ignore flake8 (lots of line too long issues) # flake8: noqa diff --git a/sklearn/utils/tests/test_random.py b/sklearn/utils/tests/test_random.py index 320ebe8b1ae65..833d27edf05ca 100644 --- a/sklearn/utils/tests/test_random.py +++ b/sklearn/utils/tests/test_random.py @@ -1,11 +1,11 @@ import numpy as np import pytest import scipy.sparse as sp -from scipy.special import comb from numpy.testing import assert_array_almost_equal +from scipy.special import comb -from sklearn.utils.random import _random_choice_csc, sample_without_replacement from sklearn.utils._random import _our_rand_r_py +from sklearn.utils.random import _random_choice_csc, sample_without_replacement ############################################################################### diff --git a/sklearn/utils/tests/test_readonly_wrapper.py b/sklearn/utils/tests/test_readonly_wrapper.py index 38163cc2461ce..f7c0077ce3b87 100644 --- a/sklearn/utils/tests/test_readonly_wrapper.py +++ b/sklearn/utils/tests/test_readonly_wrapper.py @@ -1,5 +1,4 @@ import numpy as np - import pytest from sklearn.utils._readonly_array_wrapper import ReadonlyArrayWrapper, _test_sum diff --git a/sklearn/utils/tests/test_seq_dataset.py b/sklearn/utils/tests/test_seq_dataset.py index 5c876fe62d74b..18f1be208b3f1 100644 --- a/sklearn/utils/tests/test_seq_dataset.py +++ b/sklearn/utils/tests/test_seq_dataset.py @@ -7,14 +7,14 @@ import pytest import scipy.sparse as sp from numpy.testing import assert_array_equal + +from sklearn.datasets import load_iris from sklearn.utils._seq_dataset import ( ArrayDataset32, ArrayDataset64, CSRDataset32, CSRDataset64, ) - -from sklearn.datasets import load_iris from sklearn.utils._testing import assert_allclose iris = load_iris() diff --git a/sklearn/utils/tests/test_shortest_path.py b/sklearn/utils/tests/test_shortest_path.py index 4e2618d99d54a..416187ff35fd3 100644 --- a/sklearn/utils/tests/test_shortest_path.py +++ b/sklearn/utils/tests/test_shortest_path.py @@ -3,6 +3,7 @@ import numpy as np import pytest from numpy.testing import assert_array_almost_equal + from sklearn.utils.graph import graph_shortest_path, single_source_shortest_path_length diff --git a/sklearn/utils/tests/test_show_versions.py b/sklearn/utils/tests/test_show_versions.py index e6590bfde15f5..bd166dfd8e522 100644 --- a/sklearn/utils/tests/test_show_versions.py +++ b/sklearn/utils/tests/test_show_versions.py @@ -1,8 +1,6 @@ -from sklearn.utils.fixes import threadpool_info -from sklearn.utils._show_versions import _get_sys_info -from sklearn.utils._show_versions import _get_deps_info -from sklearn.utils._show_versions import show_versions +from sklearn.utils._show_versions import _get_deps_info, _get_sys_info, show_versions from sklearn.utils._testing import ignore_warnings +from sklearn.utils.fixes import threadpool_info def test_get_sys_info(): diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index 6a86be2f0445f..f3bcaf56bb561 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -1,30 +1,29 @@ -import pytest import numpy as np +import pytest import scipy.sparse as sp - -from scipy import linalg -from numpy.testing import assert_array_almost_equal, assert_array_equal from numpy.random import RandomState +from numpy.testing import assert_array_almost_equal, assert_array_equal +from scipy import linalg from sklearn.datasets import make_classification +from sklearn.utils._testing import assert_allclose from sklearn.utils.sparsefuncs import ( - mean_variance_axis, + count_nonzero, + csc_median_axis_0, incr_mean_variance_axis, inplace_column_scale, inplace_row_scale, - inplace_swap_row, inplace_swap_column, + inplace_swap_row, + mean_variance_axis, min_max_axis, - count_nonzero, - csc_median_axis_0, ) from sklearn.utils.sparsefuncs_fast import ( assign_rows_csr, + csr_row_norms, inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2, - csr_row_norms, ) -from sklearn.utils._testing import assert_allclose def test_mean_variance_axis0(): diff --git a/sklearn/utils/tests/test_tags.py b/sklearn/utils/tests/test_tags.py index f96a4947164c3..b777b74921824 100644 --- a/sklearn/utils/tests/test_tags.py +++ b/sklearn/utils/tests/test_tags.py @@ -1,10 +1,7 @@ import pytest from sklearn.base import BaseEstimator -from sklearn.utils._tags import ( - _DEFAULT_TAGS, - _safe_tags, -) +from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags class NoTagsEstimator: diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py index fca7a07b14c19..de46d5a847023 100644 --- a/sklearn/utils/tests/test_testing.py +++ b/sklearn/utils/tests/test_testing.py @@ -1,38 +1,35 @@ -import warnings -import unittest -import sys -import os import atexit +import os +import sys +import unittest +import warnings import numpy as np - -from scipy import sparse - import pytest +from scipy import sparse -from sklearn.utils.deprecation import deprecated -from sklearn.utils.metaestimators import available_if, if_delegate_has_method +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.tree import DecisionTreeClassifier from sklearn.utils._readonly_array_wrapper import _test_sum from sklearn.utils._testing import ( - assert_raises, - assert_warns, + TempMemmap, + _convert_container, + _delete_folder, + assert_allclose, + assert_allclose_dense_sparse, assert_no_warnings, - set_random_state, assert_raise_message, - ignore_warnings, - check_docstring_parameters, - assert_allclose_dense_sparse, + assert_raises, assert_raises_regex, - TempMemmap, + assert_warns, + check_docstring_parameters, create_memmap_backed_data, - _delete_folder, - _convert_container, + ignore_warnings, raises, - assert_allclose, + set_random_state, ) - -from sklearn.tree import DecisionTreeClassifier -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.utils.deprecation import deprecated +from sklearn.utils.metaestimators import available_if, if_delegate_has_method def test_set_random_state(): diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index 82be82afa5eed..21e1247481302 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -1,37 +1,40 @@ -from copy import copy -from itertools import chain -import warnings import string import timeit +import warnings +from copy import copy +from itertools import chain -import pytest import numpy as np +import pytest import scipy.sparse as sp +from sklearn import config_context +from sklearn.utils import ( + _approximate_mode, + _determine_key_type, + _get_column_indices, + _message_with_time, + _print_elapsed_time, + _safe_indexing, + _to_object_array, + check_random_state, + column_or_1d, + deprecated, + gen_batches, + gen_even_slices, + get_chunk_n_rows, + is_scalar_nan, + resample, + safe_mask, + shuffle, +) +from sklearn.utils._mocking import MockDataFrame from sklearn.utils._testing import ( - assert_array_equal, + _convert_container, assert_allclose_dense_sparse, + assert_array_equal, assert_no_warnings, - _convert_container, ) -from sklearn.utils import check_random_state -from sklearn.utils import _determine_key_type -from sklearn.utils import deprecated -from sklearn.utils import gen_batches -from sklearn.utils import _get_column_indices -from sklearn.utils import resample -from sklearn.utils import safe_mask -from sklearn.utils import column_or_1d -from sklearn.utils import _safe_indexing -from sklearn.utils import shuffle -from sklearn.utils import gen_even_slices -from sklearn.utils import _message_with_time, _print_elapsed_time -from sklearn.utils import get_chunk_n_rows -from sklearn.utils import is_scalar_nan -from sklearn.utils import _to_object_array -from sklearn.utils import _approximate_mode -from sklearn.utils._mocking import MockDataFrame -from sklearn import config_context # toy array X_toy = np.arange(9).reshape((3, 3)) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index e33d14fa3b07e..9672bf4f4dd01 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -1,64 +1,67 @@ """Tests for input validation functions""" import numbers -import warnings import re - -from tempfile import NamedTemporaryFile +import warnings from itertools import product from operator import itemgetter +from tempfile import NamedTemporaryFile -import pytest -from pytest import importorskip import numpy as np +import pytest import scipy.sparse as sp +from pytest import importorskip -from sklearn.utils._testing import assert_no_warnings -from sklearn.utils._testing import ignore_warnings -from sklearn.utils._testing import SkipTest -from sklearn.utils._testing import assert_array_equal -from sklearn.utils._testing import assert_allclose_dense_sparse -from sklearn.utils._testing import assert_allclose -from sklearn.utils._testing import _convert_container -from sklearn.utils import as_float_array, check_array, check_symmetric -from sklearn.utils import check_X_y -from sklearn.utils import deprecated -from sklearn.utils._mocking import MockDataFrame -from sklearn.utils.fixes import parse_version -from sklearn.utils.estimator_checks import _NotAnArray -from sklearn.random_projection import _sparse_random_matrix +import sklearn +from sklearn.base import BaseEstimator +from sklearn.datasets import make_blobs +from sklearn.ensemble import RandomForestRegressor +from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning from sklearn.linear_model import ARDRegression from sklearn.neighbors import KNeighborsClassifier -from sklearn.ensemble import RandomForestRegressor +from sklearn.random_projection import _sparse_random_matrix from sklearn.svm import SVR -from sklearn.datasets import make_blobs -from sklearn.utils import _safe_indexing +from sklearn.utils import ( + _safe_indexing, + as_float_array, + check_array, + check_symmetric, + check_X_y, + deprecated, +) +from sklearn.utils._mocking import MockDataFrame +from sklearn.utils._testing import ( + SkipTest, + TempMemmap, + _convert_container, + assert_allclose, + assert_allclose_dense_sparse, + assert_array_equal, + assert_no_warnings, + ignore_warnings, +) +from sklearn.utils.estimator_checks import _NotAnArray +from sklearn.utils.fixes import parse_version from sklearn.utils.validation import ( - has_fit_parameter, - check_is_fitted, - check_consistent_length, - assert_all_finite, - check_memory, - check_non_negative, - _num_samples, - check_scalar, + FLOAT_DTYPES, + _allclose_dense_sparse, + _check_feature_names_in, + _check_fit_params, _check_psd_eigenvalues, + _check_sample_weight, _check_y, _deprecate_positional_args, - _check_sample_weight, - _allclose_dense_sparse, - _num_features, - FLOAT_DTYPES, _get_feature_names, - _check_feature_names_in, - _check_fit_params, + _num_features, + _num_samples, + assert_all_finite, + check_consistent_length, + check_is_fitted, + check_memory, + check_non_negative, + check_scalar, + has_fit_parameter, ) -from sklearn.base import BaseEstimator -import sklearn - -from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning - -from sklearn.utils._testing import TempMemmap # TODO: Remove np.matrix usage in 1.2 diff --git a/sklearn/utils/tests/test_weight_vector.py b/sklearn/utils/tests/test_weight_vector.py index 627d46d1fda06..e9c9c286af62e 100644 --- a/sklearn/utils/tests/test_weight_vector.py +++ b/sklearn/utils/tests/test_weight_vector.py @@ -1,9 +1,7 @@ import numpy as np import pytest -from sklearn.utils._weight_vector import ( - WeightVector32, - WeightVector64, -) + +from sklearn.utils._weight_vector import WeightVector32, WeightVector64 @pytest.mark.parametrize( diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 879ddac3e6f42..f5383905941e8 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -9,26 +9,24 @@ # Sylvain Marie # License: BSD 3 clause -from functools import wraps -import warnings import numbers import operator +import warnings +from contextlib import suppress +from functools import wraps +from inspect import Parameter, isclass, signature import numpy as np import scipy.sparse as sp -from inspect import signature, isclass, Parameter # mypy error: Module 'numpy.core.numeric' has no attribute 'ComplexWarning' from numpy.core.numeric import ComplexWarning # type: ignore -import joblib -from contextlib import suppress +import joblib -from .fixes import _object_dtype_isnan from .. import get_config as _get_config -from ..exceptions import PositiveSpectrumWarning -from ..exceptions import NotFittedError -from ..exceptions import DataConversionWarning +from ..exceptions import DataConversionWarning, NotFittedError, PositiveSpectrumWarning +from .fixes import _object_dtype_isnan FLOAT_DTYPES = (np.float64, np.float32, np.float16) @@ -582,9 +580,9 @@ def _pandas_dtype_needs_early_conversion(pd_dtype): # Check these early for pandas versions without extension dtypes from pandas.api.types import ( is_bool_dtype, - is_sparse, is_float_dtype, is_integer_dtype, + is_sparse, ) if is_bool_dtype(pd_dtype): From e6cd604d5db57f302ddac680a11cd215b3ccad6f Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Fri, 13 May 2022 14:23:59 +0200 Subject: [PATCH 03/10] add isort to linting CI in azure pipelines --- azure-pipelines.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 020d007acc651..9aa7303584b84 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -39,6 +39,9 @@ jobs: - bash: | black --check --diff . displayName: Run black + - bash: | + black --check --diff . + displayName: Run black - bash: | ./build_tools/circle/linting.sh displayName: Run linting From 3b9e26bc0322e2dbb4a139ea5d4fce2e206780e5 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:03:32 +0200 Subject: [PATCH 04/10] intentionally bad import order --- sklearn/model_selection/_split.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 08ca4de3c7457..98e4c6d5df6b4 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -11,6 +11,12 @@ # Rodion Martynov # License: BSD 3 clause +from ..base import _pprint +from ..utils import _approximate_mode, _safe_indexing, check_random_state, indexable +from ..utils.multiclass import type_of_target +from ..utils.validation import _num_samples, check_array, column_or_1d + + import numbers import warnings from abc import ABCMeta, abstractmethod @@ -23,11 +29,6 @@ import numpy as np from scipy.special import comb -from ..base import _pprint -from ..utils import _approximate_mode, _safe_indexing, check_random_state, indexable -from ..utils.multiclass import type_of_target -from ..utils.validation import _num_samples, check_array, column_or_1d - __all__ = [ "BaseCrossValidator", "KFold", From 570673f1f6da989c998e3734c2edcc609ae0384d Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:10:25 +0200 Subject: [PATCH 05/10] trigger CI From 6eb4a85b8ed0e46dab6aa87a9e89956935b7c707 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:24:14 +0200 Subject: [PATCH 06/10] Revert "intentionally bad import order" This reverts commit 3b9e26bc0322e2dbb4a139ea5d4fce2e206780e5. --- sklearn/model_selection/_split.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 98e4c6d5df6b4..08ca4de3c7457 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -11,12 +11,6 @@ # Rodion Martynov # License: BSD 3 clause -from ..base import _pprint -from ..utils import _approximate_mode, _safe_indexing, check_random_state, indexable -from ..utils.multiclass import type_of_target -from ..utils.validation import _num_samples, check_array, column_or_1d - - import numbers import warnings from abc import ABCMeta, abstractmethod @@ -29,6 +23,11 @@ import numpy as np from scipy.special import comb +from ..base import _pprint +from ..utils import _approximate_mode, _safe_indexing, check_random_state, indexable +from ..utils.multiclass import type_of_target +from ..utils.validation import _num_samples, check_array, column_or_1d + __all__ = [ "BaseCrossValidator", "KFold", From 7684a4bbe7bdf712f80c202b4c02698c69e84cfe Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:24:30 +0200 Subject: [PATCH 07/10] Revert "apply isort to the repo" This reverts commit 6f968645df7844cf73d1cdf81818b68a358ce95e. --- .github/scripts/label_title_regex.py | 5 +- asv_benchmarks/benchmarks/cluster.py | 2 +- asv_benchmarks/benchmarks/common.py | 8 +- asv_benchmarks/benchmarks/datasets.py | 15 +- asv_benchmarks/benchmarks/decomposition.py | 4 +- asv_benchmarks/benchmarks/ensemble.py | 2 +- asv_benchmarks/benchmarks/linear_model.py | 4 +- benchmarks/bench_20newsgroups.py | 19 +- benchmarks/bench_covertype.py | 18 +- benchmarks/bench_feature_expansions.py | 4 +- benchmarks/bench_glm.py | 3 +- benchmarks/bench_glmnet.py | 10 +- benchmarks/bench_hist_gradient_boosting.py | 15 +- .../bench_hist_gradient_boosting_adult.py | 7 +- ...hist_gradient_boosting_categorical_only.py | 3 +- ...bench_hist_gradient_boosting_higgsboson.py | 10 +- .../bench_hist_gradient_boosting_threading.py | 17 +- benchmarks/bench_isolation_forest.py | 7 +- benchmarks/bench_isotonic.py | 10 +- ...kernel_pca_solvers_time_vs_n_components.py | 7 +- ...ch_kernel_pca_solvers_time_vs_n_samples.py | 7 +- benchmarks/bench_lasso.py | 4 +- benchmarks/bench_lof.py | 8 +- benchmarks/bench_mnist.py | 18 +- benchmarks/bench_multilabel_metrics.py | 11 +- benchmarks/bench_online_ocsvm.py | 19 +- benchmarks/bench_plot_fastkmeans.py | 2 +- benchmarks/bench_plot_incremental_pca.py | 8 +- benchmarks/bench_plot_lasso_path.py | 7 +- benchmarks/bench_plot_neighbors.py | 4 +- benchmarks/bench_plot_nmf.py | 19 +- benchmarks/bench_plot_omp_lars.py | 2 +- benchmarks/bench_plot_parallel_pairwise.py | 3 +- ...ch_plot_polynomial_kernel_approximation.py | 16 +- benchmarks/bench_plot_randomized_svd.py | 27 +- benchmarks/bench_plot_svd.py | 9 +- benchmarks/bench_plot_ward.py | 2 +- benchmarks/bench_random_projections.py | 6 +- benchmarks/bench_rcv1_logreg_convergence.py | 9 +- benchmarks/bench_saga.py | 10 +- .../bench_sample_without_replacement.py | 6 +- benchmarks/bench_sgd_regression.py | 11 +- benchmarks/bench_sparsify.py | 3 +- benchmarks/bench_text_vectorizers.py | 4 +- benchmarks/bench_tree.py | 5 +- benchmarks/bench_tsne_mnist.py | 9 +- benchmarks/plot_tsne_mnist.py | 7 +- build_tools/circle/list_versions.py | 1 + build_tools/generate_authors_table.py | 7 +- build_tools/github/check_wheels.py | 5 +- build_tools/github/vendor.py | 1 + doc/conf.py | 13 +- doc/conftest.py | 12 +- doc/sphinxext/allow_nan_estimators.py | 13 +- doc/sphinxext/doi_role.py | 1 + doc/sphinxext/github_link.py | 4 +- .../machine_learning_map/parse_path.py | 192 +- .../machine_learning_map/pyparsing.py | 3437 +++++++---------- .../data/languages/fetch_data.py | 67 +- .../data/movie_reviews/fetch_data.py | 10 +- .../exercise_01_language_train_model.py | 35 +- .../skeletons/exercise_02_sentiment.py | 25 +- .../exercise_01_language_train_model.py | 42 +- .../solutions/exercise_02_sentiment.py | 48 +- .../plot_cyclical_feature_engineering.py | 24 +- .../applications/plot_digits_denoising.py | 3 +- .../applications/plot_face_recognition.py | 10 +- .../plot_model_complexity_influence.py | 10 +- .../plot_out_of_core_classification.py | 14 +- .../plot_outlier_detection_wine.py | 7 +- .../applications/plot_prediction_latency.py | 13 +- .../plot_species_distribution_modeling.py | 6 +- examples/applications/plot_stock_market.py | 1 - .../plot_tomography_l1_reconstruction.py | 9 +- .../plot_topics_extraction_with_nmf_lda.py | 5 +- examples/applications/svm_gui.py | 7 +- .../wikipedia_principal_eigenvector.py | 6 +- .../bicluster/plot_bicluster_newsgroups.py | 5 +- .../bicluster/plot_spectral_biclustering.py | 3 +- .../bicluster/plot_spectral_coclustering.py | 2 +- examples/calibration/plot_calibration.py | 3 +- .../calibration/plot_calibration_curve.py | 6 +- .../plot_calibration_multiclass.py | 1 - .../plot_classification_probability.py | 8 +- .../plot_classifier_comparison.py | 21 +- .../plot_digits_classification.py | 2 +- examples/classification/plot_lda.py | 5 +- examples/classification/plot_lda_qda.py | 8 +- .../plot_adjusted_for_chance_measures.py | 6 +- examples/cluster/plot_affinity_propagation.py | 5 +- .../cluster/plot_agglomerative_clustering.py | 1 - .../cluster/plot_agglomerative_dendrogram.py | 4 +- .../cluster/plot_birch_vs_minibatchkmeans.py | 8 +- examples/cluster/plot_bisect_kmeans.py | 3 +- examples/cluster/plot_cluster_comparison.py | 4 +- examples/cluster/plot_cluster_iris.py | 4 +- examples/cluster/plot_coin_segmentation.py | 5 +- examples/cluster/plot_color_quantization.py | 8 +- examples/cluster/plot_dbscan.py | 3 +- examples/cluster/plot_digits_agglomeration.py | 4 +- examples/cluster/plot_digits_linkage.py | 2 +- examples/cluster/plot_face_compress.py | 3 +- ...e_agglomeration_vs_univariate_selection.py | 9 +- examples/cluster/plot_inductive_clustering.py | 2 +- examples/cluster/plot_kmeans_assumptions.py | 2 +- examples/cluster/plot_kmeans_digits.py | 2 - examples/cluster/plot_kmeans_plusplus.py | 3 +- .../plot_kmeans_silhouette_analysis.py | 10 +- .../plot_kmeans_stability_low_dim_dense.py | 10 +- examples/cluster/plot_linkage_comparison.py | 4 +- examples/cluster/plot_mean_shift.py | 4 +- examples/cluster/plot_mini_batch_kmeans.py | 2 - examples/cluster/plot_optics.py | 3 +- examples/cluster/plot_segmentation_toy.py | 3 +- .../plot_ward_structured_vs_unstructured.py | 10 +- examples/compose/plot_column_transformer.py | 4 +- .../plot_column_transformer_mixed_types.py | 6 +- examples/compose/plot_compare_reduction.py | 10 +- examples/compose/plot_digits_pipe.py | 4 +- examples/compose/plot_feature_union.py | 6 +- examples/compose/plot_transformed_target.py | 6 +- .../covariance/plot_covariance_estimation.py | 5 +- examples/covariance/plot_lw_vs_oas.py | 6 +- .../covariance/plot_mahalanobis_distances.py | 1 - .../plot_robust_vs_empirical_covariance.py | 4 +- examples/covariance/plot_sparse_cov.py | 1 - .../plot_compare_cross_decomposition.py | 5 +- .../cross_decomposition/plot_pcr_vs_pls.py | 11 +- examples/datasets/plot_digits_last_image.py | 4 +- examples/datasets/plot_random_dataset.py | 4 +- .../plot_random_multilabel_dataset.py | 2 +- .../decomposition/plot_beta_divergence.py | 3 +- .../decomposition/plot_faces_decomposition.py | 5 +- .../plot_ica_blind_source_separation.py | 4 +- .../decomposition/plot_image_denoising.py | 1 + .../decomposition/plot_incremental_pca.py | 2 +- examples/decomposition/plot_pca_3d.py | 5 +- examples/decomposition/plot_pca_iris.py | 6 +- .../plot_pca_vs_fa_model_selection.py | 6 +- examples/decomposition/plot_sparse_coding.py | 2 +- examples/decomposition/plot_varimax_fa.py | 4 +- .../ensemble/plot_adaboost_hastie_10_2.py | 1 - examples/ensemble/plot_adaboost_multiclass.py | 1 + examples/ensemble/plot_adaboost_regression.py | 6 +- examples/ensemble/plot_adaboost_twoclass.py | 7 +- examples/ensemble/plot_bias_variance.py | 2 +- examples/ensemble/plot_ensemble_oob.py | 3 +- .../ensemble/plot_feature_transformation.py | 6 +- examples/ensemble/plot_forest_importances.py | 1 - .../ensemble/plot_forest_importances_faces.py | 1 - examples/ensemble/plot_forest_iris.py | 6 +- .../plot_gradient_boosting_categorical.py | 9 +- .../plot_gradient_boosting_early_stopping.py | 5 +- .../ensemble/plot_gradient_boosting_oob.py | 8 +- .../plot_gradient_boosting_quantile.py | 8 +- .../plot_gradient_boosting_regression.py | 1 - .../plot_gradient_boosting_regularization.py | 6 +- examples/ensemble/plot_isolation_forest.py | 3 +- .../ensemble/plot_monotonic_constraints.py | 6 +- .../ensemble/plot_random_forest_embedding.py | 4 +- ...ot_random_forest_regression_multioutput.py | 4 +- examples/ensemble/plot_stack_predictors.py | 7 +- .../ensemble/plot_voting_decision_regions.py | 6 +- examples/ensemble/plot_voting_probas.py | 5 +- examples/ensemble/plot_voting_regressor.py | 8 +- examples/exercises/plot_cv_digits.py | 3 +- .../plot_digits_classification_exercise.py | 2 +- examples/exercises/plot_iris_exercise.py | 3 +- .../feature_selection/plot_f_test_vs_mi.py | 3 +- .../plot_feature_selection.py | 1 - examples/feature_selection/plot_rfe_digits.py | 5 +- .../plot_rfe_with_cross_validation.py | 7 +- .../plot_select_from_model_diabetes.py | 4 +- .../gaussian_process/plot_compare_gpr_krr.py | 1 - examples/gaussian_process/plot_gpc.py | 4 +- examples/gaussian_process/plot_gpc_iris.py | 3 +- .../plot_gpc_isoprobability.py | 6 +- examples/gaussian_process/plot_gpc_xor.py | 3 +- examples/gaussian_process/plot_gpr_co2.py | 1 - .../plot_gpr_on_structured_data.py | 9 +- ...t_iterative_imputer_variants_comparison.py | 14 +- examples/impute/plot_missing_values.py | 8 +- ...linear_model_coefficient_interpretation.py | 15 +- .../inspection/plot_partial_dependence.py | 10 +- .../inspection/plot_permutation_importance.py | 3 +- ...t_permutation_importance_multicollinear.py | 2 +- .../plot_scalable_poly_kernels.py | 3 +- examples/linear_model/plot_ard.py | 3 +- ...puted_gram_matrix_with_weighted_samples.py | 1 - examples/linear_model/plot_huber_vs_ridge.py | 2 +- examples/linear_model/plot_iris_logistic.py | 3 +- .../linear_model/plot_lasso_and_elasticnet.py | 2 +- .../plot_lasso_coordinate_descent_path.py | 6 +- .../plot_lasso_dense_vs_sparse_data.py | 5 +- examples/linear_model/plot_lasso_lars.py | 5 +- examples/linear_model/plot_lasso_lars_ic.py | 5 +- .../plot_lasso_model_selection.py | 3 +- examples/linear_model/plot_logistic.py | 6 +- .../plot_logistic_l1_l2_sparsity.py | 4 +- .../linear_model/plot_logistic_multinomial.py | 5 +- .../plot_multi_task_lasso_support.py | 2 +- examples/linear_model/plot_nnls.py | 3 +- examples/linear_model/plot_ols.py | 1 - examples/linear_model/plot_ols_3d.py | 3 +- .../linear_model/plot_ols_ridge_variance.py | 2 +- examples/linear_model/plot_omp.py | 4 +- ...plot_poisson_regression_non_normal_loss.py | 28 +- .../plot_polynomial_interpolation.py | 5 +- .../linear_model/plot_quantile_regression.py | 3 +- examples/linear_model/plot_ransac.py | 3 +- examples/linear_model/plot_ridge_path.py | 3 +- examples/linear_model/plot_robust_fit.py | 8 +- examples/linear_model/plot_sgd_comparison.py | 13 +- .../linear_model/plot_sgd_early_stopping.py | 10 +- examples/linear_model/plot_sgd_iris.py | 5 +- .../linear_model/plot_sgd_loss_functions.py | 2 +- examples/linear_model/plot_sgd_penalties.py | 2 +- .../plot_sgd_separating_hyperplane.py | 5 +- .../linear_model/plot_sgd_weighted_samples.py | 3 +- .../linear_model/plot_sgdocsvm_vs_ocsvm.py | 9 +- ...sparse_logistic_regression_20newsgroups.py | 2 +- .../plot_sparse_logistic_regression_mnist.py | 1 - examples/linear_model/plot_theilsen.py | 7 +- ...lot_tweedie_regression_insurance_claims.py | 28 +- examples/manifold/plot_compare_methods.py | 6 +- examples/manifold/plot_lle_digits.py | 5 +- examples/manifold/plot_manifold_sphere.py | 12 +- examples/manifold/plot_mds.py | 3 +- examples/manifold/plot_swissroll.py | 2 +- examples/manifold/plot_t_sne_perplexity.py | 9 +- .../miscellaneous/plot_anomaly_comparison.py | 8 +- .../plot_changed_only_pprint_parameter.py | 3 +- .../plot_display_object_visualization.py | 13 +- .../miscellaneous/plot_isotonic_regression.py | 4 +- .../plot_johnson_lindenstrauss_bound.py | 13 +- .../plot_kernel_approximation.py | 7 +- .../plot_kernel_ridge_regression.py | 2 +- examples/miscellaneous/plot_multilabel.py | 6 +- .../plot_multioutput_face_completion.py | 8 +- .../plot_outlier_detection_bench.py | 9 +- ...ot_partial_dependence_visualization_api.py | 8 +- .../miscellaneous/plot_pipeline_display.py | 26 +- .../plot_roc_curve_visualization_api.py | 5 +- examples/mixture/plot_concentration_prior.py | 4 +- examples/mixture/plot_gmm.py | 4 +- examples/mixture/plot_gmm_covariances.py | 1 + examples/mixture/plot_gmm_init.py | 6 +- examples/mixture/plot_gmm_pdf.py | 3 +- examples/mixture/plot_gmm_selection.py | 6 +- examples/mixture/plot_gmm_sin.py | 4 +- .../grid_search_text_feature_extraction.py | 5 +- .../model_selection/plot_confusion_matrix.py | 6 +- examples/model_selection/plot_cv_indices.py | 15 +- examples/model_selection/plot_cv_predict.py | 6 +- .../plot_grid_search_digits.py | 3 +- .../plot_grid_search_refit_callable.py | 2 +- .../model_selection/plot_grid_search_stats.py | 1 - .../model_selection/plot_learning_curve.py | 8 +- .../plot_multi_metric_evaluation.py | 3 +- .../plot_nested_cross_validation_iris.py | 7 +- ...ot_permutation_tests_for_classification.py | 3 +- .../model_selection/plot_precision_recall.py | 7 +- .../model_selection/plot_randomized_search.py | 8 +- examples/model_selection/plot_roc.py | 12 +- examples/model_selection/plot_roc_crossval.py | 3 +- .../plot_successive_halving_heatmap.py | 6 +- .../plot_successive_halving_iterations.py | 9 +- .../plot_train_error_vs_test_error.py | 1 - .../plot_underfitting_overfitting.py | 7 +- .../model_selection/plot_validation_curve.py | 2 +- .../plot_classifier_chain_yeast.py | 9 +- .../approximate_nearest_neighbors.py | 11 +- .../plot_caching_nearest_neighbors.py | 5 +- examples/neighbors/plot_classification.py | 3 +- .../neighbors/plot_digits_kde_sampling.py | 4 +- examples/neighbors/plot_kde_1d.py | 4 +- .../neighbors/plot_lof_novelty_detection.py | 3 +- .../neighbors/plot_lof_outlier_detection.py | 3 +- examples/neighbors/plot_nca_classification.py | 6 +- examples/neighbors/plot_nca_dim_reduction.py | 5 +- examples/neighbors/plot_nca_illustration.py | 7 +- examples/neighbors/plot_nearest_centroid.py | 5 +- examples/neighbors/plot_regression.py | 4 +- examples/neighbors/plot_species_kde.py | 3 +- examples/neural_networks/plot_mlp_alpha.py | 5 +- .../plot_mlp_training_curves.py | 4 +- .../neural_networks/plot_mnist_filters.py | 4 +- .../plot_rbm_logistic_classification.py | 4 +- examples/preprocessing/plot_all_scaling.py | 24 +- examples/preprocessing/plot_discretization.py | 2 +- .../plot_discretization_classification.py | 15 +- .../plot_discretization_strategies.py | 4 +- .../preprocessing/plot_map_data_to_normal.py | 6 +- .../preprocessing/plot_scaling_importance.py | 8 +- .../plot_release_highlights_0_22_0.py | 25 +- .../plot_release_highlights_0_23_0.py | 27 +- .../plot_release_highlights_0_24_0.py | 21 +- .../plot_release_highlights_1_0_0.py | 7 +- .../plot_release_highlights_1_1_0.py | 23 +- .../plot_label_propagation_digits.py | 6 +- ...abel_propagation_digits_active_learning.py | 4 +- .../plot_label_propagation_structure.py | 1 - .../plot_self_training_varying_threshold.py | 7 +- .../plot_semi_supervised_newsgroups.py | 10 +- .../plot_semi_supervised_versus_svm_iris.py | 7 +- examples/svm/plot_custom_kernel.py | 5 +- examples/svm/plot_iris_svc.py | 4 +- .../svm/plot_linearsvc_support_vectors.py | 5 +- examples/svm/plot_oneclass.py | 5 +- examples/svm/plot_rbf_parameters.py | 3 +- examples/svm/plot_separating_hyperplane.py | 2 +- .../plot_separating_hyperplane_unbalanced.py | 1 - examples/svm/plot_svm_anova.py | 5 +- examples/svm/plot_svm_kernels.py | 4 +- examples/svm/plot_svm_margin.py | 3 +- examples/svm/plot_svm_nonlinear.py | 3 +- examples/svm/plot_svm_regression.py | 3 +- examples/svm/plot_svm_scale_c.py | 7 +- examples/svm/plot_svm_tie_breaking.py | 5 +- examples/svm/plot_weighted_samples.py | 3 +- ...ot_document_classification_20newsgroups.py | 23 +- examples/text/plot_document_clustering.py | 24 +- .../text/plot_hashing_vs_dict_vectorizer.py | 2 +- examples/tree/plot_cost_complexity_pruning.py | 3 +- examples/tree/plot_iris_dtc.py | 6 +- examples/tree/plot_tree_regression.py | 4 +- .../tree/plot_tree_regression_multioutput.py | 3 +- examples/tree/plot_unveil_tree_structure.py | 4 +- maint_tools/check_pxd_in_installation.py | 5 +- maint_tools/sort_whats_new.py | 2 +- maint_tools/update_tracking_issue.py | 4 +- setup.py | 18 +- sklearn/__init__.py | 7 +- sklearn/_build_utils/__init__.py | 11 +- sklearn/_build_utils/openmp_helpers.py | 3 +- sklearn/_build_utils/pre_build_helpers.py | 8 +- sklearn/_config.py | 2 +- sklearn/_isotonic.pyx | 3 +- sklearn/_loss/__init__.py | 11 +- sklearn/_loss/glm_distribution.py | 3 +- sklearn/_loss/link.py | 1 - sklearn/_loss/loss.py | 26 +- sklearn/_loss/setup.py | 1 - sklearn/_loss/tests/test_glm_distribution.py | 15 +- sklearn/_loss/tests/test_link.py | 10 +- sklearn/_loss/tests/test_loss.py | 14 +- sklearn/_min_dependencies.py | 3 +- sklearn/base.py | 28 +- sklearn/calibration.py | 35 +- sklearn/cluster/__init__.py | 20 +- sklearn/cluster/_affinity_propagation.py | 10 +- sklearn/cluster/_agglomerative.py | 2 +- sklearn/cluster/_bicluster.py | 11 +- sklearn/cluster/_birch.py | 19 +- sklearn/cluster/_bisect_k_means.py | 25 +- sklearn/cluster/_dbscan.py | 8 +- sklearn/cluster/_dbscan_inner.pyx | 2 +- sklearn/cluster/_feature_agglomeration.py | 2 +- sklearn/cluster/_hierarchical_fast.pyx | 14 +- sklearn/cluster/_k_means_common.pyx | 2 +- sklearn/cluster/_k_means_elkan.pyx | 22 +- sklearn/cluster/_k_means_lloyd.pyx | 24 +- sklearn/cluster/_k_means_minibatch.pyx | 2 +- sklearn/cluster/_kmeans.py | 53 +- sklearn/cluster/_mean_shift.py | 16 +- sklearn/cluster/_optics.py | 9 +- sklearn/cluster/_spectral.py | 7 +- sklearn/cluster/tests/common.py | 1 + .../tests/test_affinity_propagation.py | 12 +- sklearn/cluster/tests/test_bicluster.py | 28 +- sklearn/cluster/tests/test_birch.py | 16 +- sklearn/cluster/tests/test_bisect_k_means.py | 2 +- sklearn/cluster/tests/test_dbscan.py | 17 +- .../tests/test_feature_agglomeration.py | 4 +- sklearn/cluster/tests/test_hierarchical.py | 40 +- sklearn/cluster/tests/test_k_means.py | 41 +- sklearn/cluster/tests/test_mean_shift.py | 19 +- sklearn/cluster/tests/test_optics.py | 17 +- sklearn/cluster/tests/test_spectral.py | 18 +- sklearn/compose/__init__.py | 3 +- sklearn/compose/_column_transformer.py | 19 +- sklearn/compose/_target.py | 8 +- .../compose/tests/test_column_transformer.py | 23 +- sklearn/compose/tests/test_target.py | 23 +- sklearn/conftest.py | 29 +- sklearn/covariance/__init__.py | 15 +- sklearn/covariance/_elliptic_envelope.py | 7 +- sklearn/covariance/_empirical_covariance.py | 3 +- sklearn/covariance/_graph_lasso.py | 15 +- sklearn/covariance/_robust_covariance.py | 7 +- sklearn/covariance/_shrunk_covariance.py | 4 +- sklearn/covariance/tests/test_covariance.py | 17 +- .../tests/test_elliptic_envelope.py | 8 +- .../covariance/tests/test_graphical_lasso.py | 19 +- .../tests/test_robust_covariance.py | 6 +- sklearn/cross_decomposition/__init__.py | 2 +- sklearn/cross_decomposition/_pls.py | 19 +- sklearn/cross_decomposition/tests/test_pls.py | 13 +- sklearn/datasets/__init__.py | 89 +- sklearn/datasets/_arff_parser.py | 6 +- sklearn/datasets/_base.py | 17 +- sklearn/datasets/_california_housing.py | 21 +- sklearn/datasets/_covtype.py | 21 +- sklearn/datasets/_kddcup99.py | 19 +- sklearn/datasets/_lfw.py | 13 +- sklearn/datasets/_olivetti_faces.py | 10 +- sklearn/datasets/_openml.py | 8 +- sklearn/datasets/_rcv1.py | 16 +- sklearn/datasets/_samples_generator.py | 4 +- sklearn/datasets/_species_distributions.py | 8 +- sklearn/datasets/_svmlight_format_fast.pyx | 3 +- sklearn/datasets/_svmlight_format_io.py | 5 +- sklearn/datasets/_twenty_newsgroups.py | 28 +- sklearn/datasets/setup.py | 3 +- sklearn/datasets/tests/conftest.py | 1 - sklearn/datasets/tests/test_20news.py | 13 +- sklearn/datasets/tests/test_arff_parser.py | 5 +- sklearn/datasets/tests/test_base.py | 37 +- .../datasets/tests/test_california_housing.py | 3 +- sklearn/datasets/tests/test_common.py | 2 +- sklearn/datasets/tests/test_covtype.py | 2 - sklearn/datasets/tests/test_kddcup99.py | 9 +- sklearn/datasets/tests/test_lfw.py | 11 +- sklearn/datasets/tests/test_olivetti_faces.py | 3 +- sklearn/datasets/tests/test_openml.py | 18 +- sklearn/datasets/tests/test_rcv1.py | 9 +- .../datasets/tests/test_samples_generator.py | 50 +- .../datasets/tests/test_svmlight_format.py | 20 +- sklearn/decomposition/__init__.py | 27 +- sklearn/decomposition/_base.py | 3 +- sklearn/decomposition/_dict_learning.py | 20 +- sklearn/decomposition/_factor_analysis.py | 6 +- sklearn/decomposition/_fastica.py | 3 +- sklearn/decomposition/_incremental_pca.py | 4 +- sklearn/decomposition/_kernel_pca.py | 19 +- sklearn/decomposition/_lda.py | 7 +- sklearn/decomposition/_nmf.py | 16 +- sklearn/decomposition/_pca.py | 9 +- sklearn/decomposition/_sparse_pca.py | 4 +- sklearn/decomposition/_truncated_svd.py | 1 - sklearn/decomposition/setup.py | 1 - .../decomposition/tests/test_dict_learning.py | 50 +- .../tests/test_factor_analysis.py | 10 +- sklearn/decomposition/tests/test_fastica.py | 8 +- .../tests/test_incremental_pca.py | 15 +- .../decomposition/tests/test_kernel_pca.py | 23 +- sklearn/decomposition/tests/test_nmf.py | 24 +- .../decomposition/tests/test_online_lda.py | 19 +- sklearn/decomposition/tests/test_pca.py | 14 +- .../decomposition/tests/test_sparse_pca.py | 13 +- .../decomposition/tests/test_truncated_svd.py | 7 +- sklearn/discriminant_analysis.py | 21 +- sklearn/dummy.py | 20 +- sklearn/ensemble/__init__.py | 31 +- sklearn/ensemble/_bagging.py | 17 +- sklearn/ensemble/_base.py | 14 +- sklearn/ensemble/_forest.py | 30 +- sklearn/ensemble/_gb.py | 46 +- sklearn/ensemble/_gb_losses.py | 6 +- sklearn/ensemble/_gradient_boosting.pyx | 17 +- .../_hist_gradient_boosting/_binning.pyx | 5 +- .../_hist_gradient_boosting/_bitset.pxd | 6 +- .../_hist_gradient_boosting/_bitset.pyx | 6 +- .../_gradient_boosting.pyx | 4 +- .../_hist_gradient_boosting/_predictor.pyx | 14 +- .../_hist_gradient_boosting/binning.py | 8 +- .../gradient_boosting.py | 27 +- .../_hist_gradient_boosting/grower.py | 25 +- .../_hist_gradient_boosting/histogram.pyx | 8 +- .../_hist_gradient_boosting/predictor.py | 8 +- .../_hist_gradient_boosting/splitting.pyx | 20 +- .../tests/test_binning.py | 10 +- .../tests/test_bitset.py | 4 +- .../tests/test_compare_lightgbm.py | 12 +- .../tests/test_gradient_boosting.py | 29 +- .../tests/test_grower.py | 19 +- .../tests/test_histogram.py | 16 +- .../tests/test_monotonic_contraints.py | 16 +- .../tests/test_predictor.py | 22 +- .../tests/test_splitting.py | 14 +- .../tests/test_warm_start.py | 12 +- .../_hist_gradient_boosting/utils.pyx | 13 +- sklearn/ensemble/_iforest.py | 15 +- sklearn/ensemble/_stacking.py | 42 +- sklearn/ensemble/_voting.py | 23 +- sklearn/ensemble/_weight_boosting.py | 27 +- sklearn/ensemble/tests/test_bagging.py | 29 +- sklearn/ensemble/tests/test_base.py | 7 +- sklearn/ensemble/tests/test_common.py | 30 +- sklearn/ensemble/tests/test_forest.py | 64 +- .../ensemble/tests/test_gradient_boosting.py | 40 +- .../test_gradient_boosting_loss_functions.py | 25 +- sklearn/ensemble/tests/test_iforest.py | 25 +- sklearn/ensemble/tests/test_stacking.py | 67 +- sklearn/ensemble/tests/test_voting.py | 39 +- .../ensemble/tests/test_weight_boosting.py | 32 +- .../experimental/enable_halving_search_cv.py | 5 +- .../enable_hist_gradient_boosting.py | 1 + .../experimental/enable_iterative_imputer.py | 2 +- sklearn/externals/_arff.py | 511 ++- sklearn/externals/_lobpcg.py | 68 +- sklearn/externals/_numpy_compiler_patch.py | 4 +- sklearn/feature_extraction/__init__.py | 4 +- .../feature_extraction/_dict_vectorizer.py | 4 +- sklearn/feature_extraction/_hashing_fast.pyx | 8 +- sklearn/feature_extraction/image.py | 7 +- .../tests/test_dict_vectorizer.py | 7 +- .../tests/test_feature_hasher.py | 2 +- .../feature_extraction/tests/test_image.py | 10 +- sklearn/feature_extraction/tests/test_text.py | 48 +- sklearn/feature_extraction/text.py | 17 +- sklearn/feature_selection/__init__.py | 40 +- sklearn/feature_selection/_base.py | 8 +- sklearn/feature_selection/_from_model.py | 12 +- sklearn/feature_selection/_mutual_info.py | 4 +- sklearn/feature_selection/_rfe.py | 26 +- sklearn/feature_selection/_sequential.py | 7 +- .../_univariate_selection.py | 6 +- .../feature_selection/_variance_threshold.py | 3 +- sklearn/feature_selection/tests/test_base.py | 3 +- sklearn/feature_selection/tests/test_chi2.py | 5 +- .../tests/test_feature_select.py | 33 +- .../tests/test_from_model.py | 32 +- .../tests/test_mutual_info.py | 9 +- sklearn/feature_selection/tests/test_rfe.py | 25 +- .../tests/test_sequential.py | 12 +- .../tests/test_variance_threshold.py | 4 +- sklearn/gaussian_process/__init__.py | 5 +- sklearn/gaussian_process/_gpc.py | 12 +- sklearn/gaussian_process/_gpr.py | 8 +- sklearn/gaussian_process/kernels.py | 13 +- .../tests/_mini_sequence_kernel.py | 10 +- sklearn/gaussian_process/tests/test_gpc.py | 17 +- sklearn/gaussian_process/tests/test_gpr.py | 20 +- .../gaussian_process/tests/test_kernels.py | 38 +- sklearn/impute/_base.py | 9 +- sklearn/impute/_iterative.py | 17 +- sklearn/impute/_knn.py | 9 +- sklearn/impute/tests/test_base.py | 3 +- sklearn/impute/tests/test_common.py | 18 +- sklearn/impute/tests/test_impute.py | 34 +- sklearn/impute/tests/test_knn.py | 3 +- sklearn/inspection/__init__.py | 7 +- sklearn/inspection/_partial_dependence.py | 23 +- sklearn/inspection/_permutation_importance.py | 6 +- sklearn/inspection/_plot/decision_boundary.py | 7 +- .../inspection/_plot/partial_dependence.py | 9 +- .../tests/test_boundary_decision_display.py | 20 +- .../tests/test_plot_partial_dependence.py | 24 +- .../tests/test_partial_dependence.py | 47 +- .../tests/test_permutation_importance.py | 30 +- sklearn/isotonic.py | 10 +- sklearn/kernel_approximation.py | 14 +- sklearn/kernel_ridge.py | 6 +- sklearn/linear_model/__init__.py | 46 +- sklearn/linear_model/_base.py | 28 +- sklearn/linear_model/_bayes.py | 7 +- sklearn/linear_model/_cd_fast.pyx | 24 +- sklearn/linear_model/_coordinate_descent.py | 15 +- sklearn/linear_model/_glm/__init__.py | 4 +- sklearn/linear_model/_glm/glm.py | 6 +- sklearn/linear_model/_glm/tests/test_glm.py | 18 +- sklearn/linear_model/_huber.py | 5 +- sklearn/linear_model/_least_angle.py | 17 +- sklearn/linear_model/_linear_loss.py | 1 - sklearn/linear_model/_logistic.py | 34 +- sklearn/linear_model/_omp.py | 7 +- sklearn/linear_model/_passive_aggressive.py | 4 +- sklearn/linear_model/_quantile.py | 4 +- sklearn/linear_model/_ransac.py | 19 +- sklearn/linear_model/_ridge.py | 43 +- sklearn/linear_model/_sag.py | 6 +- sklearn/linear_model/_sgd_fast.pyx | 9 +- sklearn/linear_model/_stochastic_gradient.py | 44 +- sklearn/linear_model/_theil_sen.py | 11 +- sklearn/linear_model/setup.py | 1 - sklearn/linear_model/tests/test_base.py | 33 +- sklearn/linear_model/tests/test_bayes.py | 14 +- sklearn/linear_model/tests/test_common.py | 23 +- .../tests/test_coordinate_descent.py | 57 +- sklearn/linear_model/tests/test_huber.py | 11 +- .../linear_model/tests/test_least_angle.py | 29 +- .../linear_model/tests/test_linear_loss.py | 9 +- sklearn/linear_model/tests/test_logistic.py | 41 +- sklearn/linear_model/tests/test_omp.py | 23 +- .../tests/test_passive_aggressive.py | 19 +- sklearn/linear_model/tests/test_perceptron.py | 7 +- sklearn/linear_model/tests/test_quantile.py | 5 +- sklearn/linear_model/tests/test_ransac.py | 19 +- sklearn/linear_model/tests/test_ridge.py | 89 +- sklearn/linear_model/tests/test_sag.py | 25 +- sklearn/linear_model/tests/test_sgd.py | 37 +- .../tests/test_sparse_coordinate_descent.py | 15 +- sklearn/linear_model/tests/test_theil_sen.py | 16 +- sklearn/manifold/__init__.py | 2 +- sklearn/manifold/_barnes_hut_tsne.pyx | 7 +- sklearn/manifold/_isomap.py | 9 +- sklearn/manifold/_locally_linear.py | 13 +- sklearn/manifold/_mds.py | 9 +- sklearn/manifold/_spectral_embedding.py | 12 +- sklearn/manifold/_t_sne.py | 17 +- sklearn/manifold/_utils.pyx | 4 +- sklearn/manifold/tests/test_isomap.py | 14 +- sklearn/manifold/tests/test_locally_linear.py | 12 +- sklearn/manifold/tests/test_mds.py | 2 +- .../manifold/tests/test_spectral_embedding.py | 24 +- sklearn/manifold/tests/test_t_sne.py | 56 +- sklearn/metrics/__init__.py | 180 +- sklearn/metrics/_classification.py | 26 +- sklearn/metrics/_dist_metrics.pxd | 3 +- sklearn/metrics/_dist_metrics.pyx | 14 +- .../metrics/_pairwise_distances_reduction.pyx | 48 +- sklearn/metrics/_plot/confusion_matrix.py | 7 +- sklearn/metrics/_plot/det_curve.py | 9 +- .../metrics/_plot/precision_recall_curve.py | 8 +- sklearn/metrics/_plot/roc_curve.py | 9 +- sklearn/metrics/_plot/tests/test_base.py | 3 +- .../_plot/tests/test_common_curve_display.py | 7 +- .../tests/test_confusion_matrix_display.py | 12 +- .../_plot/tests/test_det_curve_display.py | 6 +- .../_plot/tests/test_plot_confusion_matrix.py | 15 +- .../_plot/tests/test_plot_curve_common.py | 7 +- .../_plot/tests/test_plot_det_curve.py | 6 +- .../_plot/tests/test_plot_precision_recall.py | 19 +- .../_plot/tests/test_plot_roc_curve.py | 13 +- .../tests/test_precision_recall_display.py | 9 +- .../_plot/tests/test_roc_curve_display.py | 14 +- sklearn/metrics/_ranking.py | 18 +- sklearn/metrics/_regression.py | 7 +- sklearn/metrics/_scorer.py | 64 +- sklearn/metrics/cluster/__init__.py | 40 +- sklearn/metrics/cluster/_bicluster.py | 2 +- .../cluster/_expected_mutual_info_fast.pyx | 6 +- sklearn/metrics/cluster/_supervised.py | 2 +- sklearn/metrics/cluster/_unsupervised.py | 7 +- .../metrics/cluster/tests/test_bicluster.py | 5 +- sklearn/metrics/cluster/tests/test_common.py | 30 +- .../metrics/cluster/tests/test_supervised.py | 37 +- .../cluster/tests/test_unsupervised.py | 14 +- sklearn/metrics/pairwise.py | 28 +- sklearn/metrics/setup.py | 2 +- sklearn/metrics/tests/test_classification.py | 83 +- sklearn/metrics/tests/test_common.py | 114 +- sklearn/metrics/tests/test_dist_metrics.py | 9 +- sklearn/metrics/tests/test_pairwise.py | 89 +- .../test_pairwise_distances_reduction.py | 9 +- sklearn/metrics/tests/test_ranking.py | 65 +- sklearn/metrics/tests/test_regression.py | 52 +- sklearn/metrics/tests/test_score_objects.py | 65 +- sklearn/mixture/__init__.py | 3 +- sklearn/mixture/_base.py | 3 +- sklearn/mixture/_bayesian_mixture.py | 17 +- sklearn/mixture/_gaussian_mixture.py | 4 +- .../mixture/tests/test_bayesian_mixture.py | 20 +- .../mixture/tests/test_gaussian_mixture.py | 31 +- sklearn/mixture/tests/test_mixture.py | 5 +- sklearn/model_selection/__init__.py | 61 +- sklearn/model_selection/_search.py | 40 +- .../_search_successive_halving.py | 10 +- sklearn/model_selection/_split.py | 18 +- sklearn/model_selection/_validation.py | 23 +- sklearn/model_selection/tests/test_search.py | 112 +- sklearn/model_selection/tests/test_split.py | 81 +- .../tests/test_successive_halving.py | 25 +- .../model_selection/tests/test_validation.py | 135 +- sklearn/multiclass.py | 32 +- sklearn/multioutput.py | 20 +- sklearn/naive_bayes.py | 10 +- sklearn/neighbors/__init__.py | 21 +- sklearn/neighbors/_base.py | 29 +- sklearn/neighbors/_binary_tree.pxi | 17 +- sklearn/neighbors/_classification.py | 16 +- sklearn/neighbors/_graph.py | 5 +- sklearn/neighbors/_kde.py | 8 +- sklearn/neighbors/_lof.py | 9 +- sklearn/neighbors/_nca.py | 18 +- sklearn/neighbors/_nearest_centroid.py | 5 +- sklearn/neighbors/_partition_nodes.pxd | 1 - sklearn/neighbors/_quad_tree.pyx | 12 +- sklearn/neighbors/_regression.py | 9 +- sklearn/neighbors/_unsupervised.py | 4 +- sklearn/neighbors/tests/test_ball_tree.py | 3 +- sklearn/neighbors/tests/test_kd_tree.py | 4 +- sklearn/neighbors/tests/test_kde.py | 13 +- sklearn/neighbors/tests/test_lof.py | 16 +- sklearn/neighbors/tests/test_nca.py | 13 +- .../neighbors/tests/test_nearest_centroid.py | 4 +- sklearn/neighbors/tests/test_neighbors.py | 48 +- .../tests/test_neighbors_pipeline.py | 25 +- .../neighbors/tests/test_neighbors_tree.py | 29 +- sklearn/neighbors/tests/test_quad_tree.py | 2 +- sklearn/neural_network/__init__.py | 4 +- sklearn/neural_network/_base.py | 1 + .../neural_network/_multilayer_perceptron.py | 41 +- sklearn/neural_network/_rbm.py | 10 +- sklearn/neural_network/tests/test_base.py | 5 +- sklearn/neural_network/tests/test_mlp.py | 30 +- sklearn/neural_network/tests/test_rbm.py | 14 +- .../tests/test_stochastic_optimizers.py | 3 +- sklearn/pipeline.py | 19 +- sklearn/preprocessing/__init__.py | 57 +- .../_csr_polynomial_expansion.pyx | 3 +- sklearn/preprocessing/_data.py | 22 +- sklearn/preprocessing/_discretization.py | 16 +- sklearn/preprocessing/_encoders.py | 9 +- sklearn/preprocessing/_label.py | 13 +- sklearn/preprocessing/_polynomial.py | 10 +- sklearn/preprocessing/tests/test_common.py | 38 +- sklearn/preprocessing/tests/test_data.py | 83 +- .../tests/test_discretization.py | 10 +- sklearn/preprocessing/tests/test_encoders.py | 14 +- .../tests/test_function_transformer.py | 8 +- sklearn/preprocessing/tests/test_label.py | 41 +- .../preprocessing/tests/test_polynomial.py | 6 +- sklearn/random_projection.py | 8 +- sklearn/semi_supervised/_label_propagation.py | 5 +- sklearn/semi_supervised/_self_training.py | 6 +- .../tests/test_label_propagation.py | 14 +- .../tests/test_self_training.py | 11 +- sklearn/setup.py | 4 +- sklearn/svm/__init__.py | 2 +- sklearn/svm/_base.py | 34 +- sklearn/svm/_bounds.py | 2 +- sklearn/svm/_classes.py | 11 +- sklearn/svm/_liblinear.pyx | 7 +- sklearn/svm/_libsvm.pyx | 5 +- sklearn/svm/_libsvm_sparse.pyx | 8 +- sklearn/svm/setup.py | 1 - sklearn/svm/tests/test_bounds.py | 10 +- sklearn/svm/tests/test_sparse.py | 12 +- sklearn/svm/tests/test_svm.py | 37 +- sklearn/tests/random_seed.py | 3 +- sklearn/tests/test_base.py | 35 +- sklearn/tests/test_build.py | 3 +- sklearn/tests/test_calibration.py | 59 +- sklearn/tests/test_common.py | 48 +- sklearn/tests/test_config.py | 4 +- sklearn/tests/test_discriminant_analysis.py | 31 +- sklearn/tests/test_docstring_parameters.py | 30 +- sklearn/tests/test_docstrings.py | 11 +- sklearn/tests/test_dummy.py | 16 +- sklearn/tests/test_isotonic.py | 20 +- sklearn/tests/test_kernel_approximation.py | 26 +- sklearn/tests/test_kernel_ridge.py | 7 +- sklearn/tests/test_metaestimators.py | 27 +- sklearn/tests/test_min_dependencies_readme.py | 3 +- sklearn/tests/test_multiclass.py | 56 +- sklearn/tests/test_multioutput.py | 56 +- sklearn/tests/test_naive_bayes.py | 28 +- sklearn/tests/test_pipeline.py | 49 +- sklearn/tests/test_random_projection.py | 32 +- sklearn/tree/__init__.py | 14 +- sklearn/tree/_classes.py | 42 +- sklearn/tree/_criterion.pxd | 12 +- sklearn/tree/_criterion.pyx | 9 +- sklearn/tree/_export.py | 9 +- sklearn/tree/_splitter.pxd | 11 +- sklearn/tree/_splitter.pyx | 17 +- sklearn/tree/_tree.pxd | 5 +- sklearn/tree/_tree.pyx | 21 +- sklearn/tree/_utils.pxd | 4 +- sklearn/tree/_utils.pyx | 6 +- sklearn/tree/tests/test_export.py | 13 +- sklearn/tree/tests/test_reingold_tilford.py | 3 +- sklearn/tree/tests/test_tree.py | 80 +- sklearn/utils/__init__.py | 41 +- sklearn/utils/_cython_blas.pyx | 37 +- sklearn/utils/_encode.py | 3 +- sklearn/utils/_estimator_html_repr.py | 5 +- sklearn/utils/_fast_dict.pxd | 3 +- sklearn/utils/_fast_dict.pyx | 11 +- sklearn/utils/_joblib.py | 21 +- sklearn/utils/_logistic_sigmoid.pyx | 3 +- sklearn/utils/_mask.py | 3 +- sklearn/utils/_mocking.py | 3 +- sklearn/utils/_pprint.py | 2 +- sklearn/utils/_random.pxd | 2 - sklearn/utils/_random.pyx | 3 - sklearn/utils/_readonly_array_wrapper.pyx | 4 +- sklearn/utils/_show_versions.py | 9 +- sklearn/utils/_sorting.pxd | 3 +- sklearn/utils/_sorting.pyx | 1 - sklearn/utils/_testing.py | 51 +- sklearn/utils/_vector_sentinel.pxd | 4 +- sklearn/utils/_vector_sentinel.pyx | 6 +- sklearn/utils/arrayfuncs.pyx | 10 +- sklearn/utils/class_weight.py | 1 + sklearn/utils/deprecation.py | 3 +- sklearn/utils/estimator_checks.py | 91 +- sklearn/utils/fixes.py | 14 +- sklearn/utils/graph.py | 2 +- sklearn/utils/metaestimators.py | 13 +- sklearn/utils/multiclass.py | 9 +- sklearn/utils/murmurhash.pyx | 2 - sklearn/utils/optimize.py | 5 +- sklearn/utils/random.py | 3 +- sklearn/utils/sparsefuncs.py | 10 +- sklearn/utils/sparsefuncs_fast.pyx | 4 +- sklearn/utils/tests/test_arrayfuncs.py | 2 +- sklearn/utils/tests/test_class_weight.py | 7 +- sklearn/utils/tests/test_cython_blas.py | 33 +- sklearn/utils/tests/test_cython_templating.py | 2 - sklearn/utils/tests/test_deprecation.py | 4 +- sklearn/utils/tests/test_encode.py | 5 +- sklearn/utils/tests/test_estimator_checks.py | 50 +- .../utils/tests/test_estimator_html_repr.py | 40 +- sklearn/utils/tests/test_extmath.py | 51 +- sklearn/utils/tests/test_fixes.py | 4 +- sklearn/utils/tests/test_graph.py | 4 +- sklearn/utils/tests/test_metaestimators.py | 9 +- sklearn/utils/tests/test_mocking.py | 10 +- sklearn/utils/tests/test_multiclass.py | 51 +- sklearn/utils/tests/test_murmurhash.py | 4 +- sklearn/utils/tests/test_optimize.py | 3 +- sklearn/utils/tests/test_parallel.py | 3 +- sklearn/utils/tests/test_pprint.py | 9 +- sklearn/utils/tests/test_random.py | 4 +- sklearn/utils/tests/test_readonly_wrapper.py | 1 + sklearn/utils/tests/test_seq_dataset.py | 4 +- sklearn/utils/tests/test_shortest_path.py | 1 - sklearn/utils/tests/test_show_versions.py | 6 +- sklearn/utils/tests/test_sparsefuncs.py | 19 +- sklearn/utils/tests/test_tags.py | 5 +- sklearn/utils/tests/test_testing.py | 41 +- sklearn/utils/tests/test_utils.py | 51 +- sklearn/utils/tests/test_validation.py | 87 +- sklearn/utils/tests/test_weight_vector.py | 6 +- sklearn/utils/validation.py | 18 +- 828 files changed, 7304 insertions(+), 8016 deletions(-) diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py index a022c3c4dd2a7..ddf9bda3492de 100644 --- a/.github/scripts/label_title_regex.py +++ b/.github/scripts/label_title_regex.py @@ -1,11 +1,10 @@ """Labels PRs based on title. Must be run in a github action with the pull_request_target event.""" -import json +from github import Github import os +import json import re -from github import Github - context_dict = json.loads(os.getenv("CONTEXT_GITHUB")) repo = context_dict["repository"] diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py index ebf64d07e557b..5973947a7d295 100644 --- a/asv_benchmarks/benchmarks/cluster.py +++ b/asv_benchmarks/benchmarks/cluster.py @@ -1,7 +1,7 @@ from sklearn.cluster import KMeans, MiniBatchKMeans from .common import Benchmark, Estimator, Predictor, Transformer -from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset +from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset from .utils import neg_mean_inertia diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py index aeea558844587..c3e114a212047 100644 --- a/asv_benchmarks/benchmarks/common.py +++ b/asv_benchmarks/benchmarks/common.py @@ -1,11 +1,11 @@ -import itertools -import json import os -import pickle +import json import timeit +import pickle +import itertools from abc import ABC, abstractmethod -from multiprocessing import cpu_count from pathlib import Path +from multiprocessing import cpu_count import numpy as np diff --git a/asv_benchmarks/benchmarks/datasets.py b/asv_benchmarks/benchmarks/datasets.py index b8d2fdc9bad21..dbe0eac0b822c 100644 --- a/asv_benchmarks/benchmarks/datasets.py +++ b/asv_benchmarks/benchmarks/datasets.py @@ -1,22 +1,21 @@ -from pathlib import Path - import numpy as np import scipy.sparse as sp - from joblib import Memory +from pathlib import Path + +from sklearn.decomposition import TruncatedSVD from sklearn.datasets import ( + make_blobs, fetch_20newsgroups, - fetch_olivetti_faces, fetch_openml, load_digits, - make_blobs, - make_classification, make_regression, + make_classification, + fetch_olivetti_faces, ) -from sklearn.decomposition import TruncatedSVD +from sklearn.preprocessing import MaxAbsScaler, StandardScaler from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split -from sklearn.preprocessing import MaxAbsScaler, StandardScaler # memory location for caching datasets M = Memory(location=str(Path(__file__).resolve().parent / "cache")) diff --git a/asv_benchmarks/benchmarks/decomposition.py b/asv_benchmarks/benchmarks/decomposition.py index d986222758970..b5e71cdd0b556 100644 --- a/asv_benchmarks/benchmarks/decomposition.py +++ b/asv_benchmarks/benchmarks/decomposition.py @@ -1,8 +1,8 @@ from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning from .common import Benchmark, Estimator, Transformer -from .datasets import _mnist_dataset, _olivetti_faces_dataset -from .utils import make_dict_learning_scorers, make_pca_scorers +from .datasets import _olivetti_faces_dataset, _mnist_dataset +from .utils import make_pca_scorers, make_dict_learning_scorers class PCABenchmark(Transformer, Estimator, Benchmark): diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py index c336d1e5f8805..8c5a28e3da90f 100644 --- a/asv_benchmarks/benchmarks/ensemble.py +++ b/asv_benchmarks/benchmarks/ensemble.py @@ -1,7 +1,7 @@ from sklearn.ensemble import ( + RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, - RandomForestClassifier, ) from .common import Benchmark, Estimator, Predictor diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py index 8ed9b02ec4dfe..663ceca61d063 100644 --- a/asv_benchmarks/benchmarks/linear_model.py +++ b/asv_benchmarks/benchmarks/linear_model.py @@ -1,9 +1,9 @@ from sklearn.linear_model import ( + LogisticRegression, + Ridge, ElasticNet, Lasso, LinearRegression, - LogisticRegression, - Ridge, SGDRegressor, ) diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py index d63a476a8e438..cf38bc73a38ec 100644 --- a/benchmarks/bench_20newsgroups.py +++ b/benchmarks/bench_20newsgroups.py @@ -1,20 +1,19 @@ -import argparse from time import time - +import argparse import numpy as np -from sklearn.datasets import fetch_20newsgroups_vectorized from sklearn.dummy import DummyClassifier -from sklearn.ensemble import ( - AdaBoostClassifier, - ExtraTreesClassifier, - RandomForestClassifier, -) -from sklearn.linear_model import LogisticRegression + +from sklearn.datasets import fetch_20newsgroups_vectorized from sklearn.metrics import accuracy_score -from sklearn.naive_bayes import MultinomialNB from sklearn.utils.validation import check_array +from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.ensemble import AdaBoostClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.naive_bayes import MultinomialNB + ESTIMATORS = { "dummy": DummyClassifier(), "random_forest": RandomForestClassifier(max_features="sqrt", min_samples_split=10), diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py index ca8b58780d946..8a13a2d9806c6 100644 --- a/benchmarks/bench_covertype.py +++ b/benchmarks/bench_covertype.py @@ -45,24 +45,20 @@ # Arnaud Joly # License: BSD 3 clause -import argparse import os from time import time - +import argparse import numpy as np - from joblib import Memory + from sklearn.datasets import fetch_covtype, get_data_home -from sklearn.ensemble import ( - ExtraTreesClassifier, - GradientBoostingClassifier, - RandomForestClassifier, -) -from sklearn.linear_model import LogisticRegression, SGDClassifier -from sklearn.metrics import zero_one_loss -from sklearn.naive_bayes import GaussianNB from sklearn.svm import LinearSVC +from sklearn.linear_model import SGDClassifier, LogisticRegression +from sklearn.naive_bayes import GaussianNB from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.metrics import zero_one_loss from sklearn.utils import check_array # Memoize the data extraction and memory map the resulting diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py index 2b48a43a1b8d0..98fa17b99f47a 100644 --- a/benchmarks/bench_feature_expansions.py +++ b/benchmarks/bench_feature_expansions.py @@ -1,10 +1,8 @@ -from time import time - import matplotlib.pyplot as plt import numpy as np import scipy.sparse as sparse - from sklearn.preprocessing import PolynomialFeatures +from time import time degree = 2 trials = 3 diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py index 98d1aedc7fd8a..06ca4d1276e1c 100644 --- a/benchmarks/bench_glm.py +++ b/benchmarks/bench_glm.py @@ -5,11 +5,10 @@ """ from datetime import datetime - import numpy as np - from sklearn import linear_model + if __name__ == "__main__": import matplotlib.pyplot as plt diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py index 7b111f95044e2..8a0a0545bb627 100644 --- a/benchmarks/bench_glmnet.py +++ b/benchmarks/bench_glmnet.py @@ -16,11 +16,9 @@ In both cases, only 10% of the features are informative. """ +import numpy as np import gc from time import time - -import numpy as np - from sklearn.datasets import make_regression alpha = 0.1 @@ -47,12 +45,12 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef): if __name__ == "__main__": - # Delayed import of matplotlib.pyplot - import matplotlib.pyplot as plt from glmnet.elastic_net import Lasso as GlmnetLasso - from sklearn.linear_model import Lasso as ScikitLasso + # Delayed import of matplotlib.pyplot + import matplotlib.pyplot as plt + scikit_results = [] glmnet_results = [] n = 20 diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py index c1dfffabe71c2..163e21f98ed0d 100644 --- a/benchmarks/bench_hist_gradient_boosting.py +++ b/benchmarks/bench_hist_gradient_boosting.py @@ -1,16 +1,15 @@ -import argparse from time import time +import argparse import matplotlib.pyplot as plt import numpy as np - -from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import ( - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, -) -from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator from sklearn.model_selection import train_test_split +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.datasets import make_classification +from sklearn.datasets import make_regression +from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator + parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py index 5fa5bbae0c35c..1b5905b1cf4e8 100644 --- a/benchmarks/bench_hist_gradient_boosting_adult.py +++ b/benchmarks/bench_hist_gradient_boosting_adult.py @@ -4,14 +4,15 @@ import numpy as np import pandas as pd -from sklearn.compose import make_column_selector, make_column_transformer +from sklearn.model_selection import train_test_split +from sklearn.compose import make_column_transformer, make_column_selector from sklearn.datasets import fetch_openml +from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator -from sklearn.metrics import accuracy_score, roc_auc_score -from sklearn.model_selection import train_test_split from sklearn.preprocessing import OrdinalEncoder + parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) parser.add_argument("--n-trees", type=int, default=100) diff --git a/benchmarks/bench_hist_gradient_boosting_categorical_only.py b/benchmarks/bench_hist_gradient_boosting_categorical_only.py index 1085bbc49f4f8..e8d215170f9c8 100644 --- a/benchmarks/bench_hist_gradient_boosting_categorical_only.py +++ b/benchmarks/bench_hist_gradient_boosting_categorical_only.py @@ -1,10 +1,11 @@ import argparse from time import time +from sklearn.preprocessing import KBinsDiscretizer from sklearn.datasets import make_classification from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator -from sklearn.preprocessing import KBinsDiscretizer + parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py index 197fc6ae43844..abe8018adfd83 100644 --- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py +++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py @@ -1,17 +1,17 @@ -import argparse +from urllib.request import urlretrieve import os from gzip import GzipFile from time import time -from urllib.request import urlretrieve +import argparse import numpy as np import pandas as pd - from joblib import Memory +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator -from sklearn.metrics import accuracy_score, roc_auc_score -from sklearn.model_selection import train_test_split + parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_threading.py b/benchmarks/bench_hist_gradient_boosting_threading.py index 9acf65bdbaf6a..70787fd2eb479 100644 --- a/benchmarks/bench_hist_gradient_boosting_threading.py +++ b/benchmarks/bench_hist_gradient_boosting_threading.py @@ -1,19 +1,18 @@ +from time import time import argparse import os from pprint import pprint -from time import time import numpy as np from threadpoolctl import threadpool_limits - import sklearn -from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import ( - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, -) -from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator from sklearn.model_selection import train_test_split +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.datasets import make_classification +from sklearn.datasets import make_regression +from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator + parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) @@ -291,8 +290,8 @@ def one_run(n_threads, n_samples): if args.plot or args.plot_filename: - import matplotlib import matplotlib.pyplot as plt + import matplotlib fig, axs = plt.subplots(2, figsize=(12, 12)) diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py index 5caf31e4ec4de..968ecf20876ae 100644 --- a/benchmarks/bench_isolation_forest.py +++ b/benchmarks/bench_isolation_forest.py @@ -17,13 +17,12 @@ """ from time import time - -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml from sklearn.ensemble import IsolationForest -from sklearn.metrics import auc, roc_curve +from sklearn.metrics import roc_curve, auc +from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml from sklearn.preprocessing import LabelBinarizer from sklearn.utils import shuffle as sh diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py index 221e6fb12da75..458a04a463303 100644 --- a/benchmarks/bench_isotonic.py +++ b/benchmarks/bench_isotonic.py @@ -10,15 +10,13 @@ This allows the scaling of the algorithm with the problem size to be visualized and understood. """ -import argparse +import numpy as np import gc from datetime import datetime - -import matplotlib.pyplot as plt -import numpy as np -from scipy.special import expit - from sklearn.isotonic import isotonic_regression +from scipy.special import expit +import matplotlib.pyplot as plt +import argparse def generate_perturbed_logarithm_dataset(size): diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index bfd6caf374b4c..e4eddf9cb745a 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -39,12 +39,13 @@ import time -import matplotlib.pyplot as plt import numpy as np -from numpy.testing import assert_array_almost_equal +import matplotlib.pyplot as plt -from sklearn.datasets import make_circles +from numpy.testing import assert_array_almost_equal from sklearn.decomposition import KernelPCA +from sklearn.datasets import make_circles + print(__doc__) diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py index e23261f4e871b..b6d82647012d5 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py @@ -41,12 +41,13 @@ import time -import matplotlib.pyplot as plt import numpy as np -from numpy.testing import assert_array_almost_equal +import matplotlib.pyplot as plt -from sklearn.datasets import make_circles +from numpy.testing import assert_array_almost_equal from sklearn.decomposition import KernelPCA +from sklearn.datasets import make_circles + print(__doc__) diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py index 524ac7dfbad63..50d1b5466a345 100644 --- a/benchmarks/bench_lasso.py +++ b/benchmarks/bench_lasso.py @@ -13,7 +13,6 @@ """ import gc from time import time - import numpy as np from sklearn.datasets import make_regression @@ -62,9 +61,8 @@ def compute_bench(alpha, n_samples, n_features, precompute): if __name__ == "__main__": - import matplotlib.pyplot as plt - from sklearn.linear_model import Lasso, LassoLars + import matplotlib.pyplot as plt alpha = 0.01 # regularization parameter diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py index 8652073a7203d..31057e2e4067b 100644 --- a/benchmarks/bench_lof.py +++ b/benchmarks/bench_lof.py @@ -18,13 +18,11 @@ """ from time import time - -import matplotlib.pyplot as plt import numpy as np - -from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml -from sklearn.metrics import auc, roc_curve +import matplotlib.pyplot as plt from sklearn.neighbors import LocalOutlierFactor +from sklearn.metrics import roc_curve, auc +from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml from sklearn.preprocessing import LabelBinarizer print(__doc__) diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py index 5eca252282d11..c50bfc2e594d6 100644 --- a/benchmarks/bench_mnist.py +++ b/benchmarks/bench_mnist.py @@ -30,24 +30,26 @@ # Arnaud Joly # License: BSD 3 clause -import argparse import os from time import time - +import argparse import numpy as np - from joblib import Memory -from sklearn.datasets import fetch_openml, get_data_home + +from sklearn.datasets import fetch_openml +from sklearn.datasets import get_data_home +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.ensemble import RandomForestClassifier from sklearn.dummy import DummyClassifier -from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier -from sklearn.kernel_approximation import Nystroem, RBFSampler -from sklearn.linear_model import LogisticRegression +from sklearn.kernel_approximation import Nystroem +from sklearn.kernel_approximation import RBFSampler from sklearn.metrics import zero_one_loss -from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier from sklearn.utils import check_array +from sklearn.linear_model import LogisticRegression +from sklearn.neural_network import MLPClassifier # Memoize the data extraction and memory map the resulting # train / test splits in readonly mode diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py index 1b8449a24da51..2a87b388e91a2 100755 --- a/benchmarks/bench_multilabel_metrics.py +++ b/benchmarks/bench_multilabel_metrics.py @@ -3,25 +3,26 @@ A comparison of multilabel target formats and metrics over them """ -import argparse +from timeit import timeit +from functools import partial import itertools +import argparse import sys -from functools import partial -from timeit import timeit import matplotlib.pyplot as plt -import numpy as np import scipy.sparse as sp +import numpy as np from sklearn.datasets import make_multilabel_classification from sklearn.metrics import ( - accuracy_score, f1_score, + accuracy_score, hamming_loss, jaccard_similarity_score, ) from sklearn.utils._testing import ignore_warnings + METRICS = { "f1": partial(f1_score, average="micro"), "f1-by-sample": partial(f1_score, average="samples"), diff --git a/benchmarks/bench_online_ocsvm.py b/benchmarks/bench_online_ocsvm.py index 95a68336c66aa..c7eaefe082948 100644 --- a/benchmarks/bench_online_ocsvm.py +++ b/benchmarks/bench_online_ocsvm.py @@ -15,20 +15,21 @@ """ from time import time - -import matplotlib -import matplotlib.pyplot as plt import numpy as np + from scipy.interpolate import interp1d -from sklearn.datasets import fetch_covtype, fetch_kddcup99 -from sklearn.kernel_approximation import Nystroem -from sklearn.linear_model import SGDOneClassSVM -from sklearn.metrics import auc, roc_curve -from sklearn.pipeline import make_pipeline +from sklearn.metrics import roc_curve, auc +from sklearn.datasets import fetch_kddcup99, fetch_covtype from sklearn.preprocessing import LabelBinarizer, StandardScaler -from sklearn.svm import OneClassSVM +from sklearn.pipeline import make_pipeline from sklearn.utils import shuffle +from sklearn.kernel_approximation import Nystroem +from sklearn.svm import OneClassSVM +from sklearn.linear_model import SGDOneClassSVM + +import matplotlib.pyplot as plt +import matplotlib font = {"weight": "normal", "size": 15} diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py index 0d8fe261cb338..edbf9412deca2 100644 --- a/benchmarks/bench_plot_fastkmeans.py +++ b/benchmarks/bench_plot_fastkmeans.py @@ -98,8 +98,8 @@ def compute_bench_2(chunks): if __name__ == "__main__": - import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection + import matplotlib.pyplot as plt samples_range = np.linspace(50, 150, 5).astype(int) features_range = np.linspace(150, 50000, 5).astype(int) diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py index 49b87c8c7060a..0f42e4b630f1d 100644 --- a/benchmarks/bench_plot_incremental_pca.py +++ b/benchmarks/bench_plot_incremental_pca.py @@ -7,15 +7,13 @@ """ +import numpy as np import gc -from collections import defaultdict from time import time - +from collections import defaultdict import matplotlib.pyplot as plt -import numpy as np - from sklearn.datasets import fetch_lfw_people -from sklearn.decomposition import PCA, IncrementalPCA +from sklearn.decomposition import IncrementalPCA, PCA def plot_results(X, y, label): diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py index 37bb5c6ab0071..4373c70223976 100644 --- a/benchmarks/bench_plot_lasso_path.py +++ b/benchmarks/bench_plot_lasso_path.py @@ -2,15 +2,16 @@ The input data is mostly low rank but is a fat infinite tail. """ +from collections import defaultdict import gc import sys -from collections import defaultdict from time import time import numpy as np +from sklearn.linear_model import lars_path, lars_path_gram +from sklearn.linear_model import lasso_path from sklearn.datasets import make_regression -from sklearn.linear_model import lars_path, lars_path_gram, lasso_path def compute_bench(samples_range, features_range): @@ -80,8 +81,8 @@ def compute_bench(samples_range, features_range): if __name__ == "__main__": - import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection + import matplotlib.pyplot as plt samples_range = np.linspace(10, 2000, 5).astype(int) features_range = np.linspace(10, 2000, 5).astype(int) diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py index 2d9cf2b08b71d..c6e5541eda6f3 100644 --- a/benchmarks/bench_plot_neighbors.py +++ b/benchmarks/bench_plot_neighbors.py @@ -3,11 +3,11 @@ """ from time import time -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib import ticker -from sklearn import datasets, neighbors +from sklearn import neighbors, datasets def get_data(N, D, dataset="dense"): diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index 8ac667f0e433d..745828955f9e8 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -6,25 +6,28 @@ # Anthony Di Franco (projected gradient, Python and NumPy port) # License: BSD 3 clause -import numbers +from time import time import sys import warnings -from time import time +import numbers -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt +from joblib import Memory import pandas -from joblib import Memory +from sklearn.utils._testing import ignore_warnings +from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition import NMF -from sklearn.decomposition._nmf import _beta_divergence, _check_init, _initialize_nmf +from sklearn.decomposition._nmf import _initialize_nmf +from sklearn.decomposition._nmf import _beta_divergence +from sklearn.decomposition._nmf import _check_init from sklearn.exceptions import ConvergenceWarning -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.utils import check_array -from sklearn.utils._testing import ignore_warnings from sklearn.utils.extmath import safe_sparse_dot, squared_norm +from sklearn.utils import check_array from sklearn.utils.validation import check_is_fitted, check_non_negative + mem = Memory(cachedir=".", verbose=0) ################### diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index af94d38061ac7..4325e1fb17f3c 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -9,8 +9,8 @@ import numpy as np -from sklearn.datasets import make_sparse_coded_signal from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp +from sklearn.datasets import make_sparse_coded_signal def compute_bench(samples_range, features_range): diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py index ca12972f9be6c..a41e3fab20589 100644 --- a/benchmarks/bench_plot_parallel_pairwise.py +++ b/benchmarks/bench_plot_parallel_pairwise.py @@ -4,8 +4,9 @@ import matplotlib.pyplot as plt -from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels from sklearn.utils import check_random_state +from sklearn.metrics.pairwise import pairwise_distances +from sklearn.metrics.pairwise import pairwise_kernels def plot(func): diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py index e48de2881326e..b21589263a49f 100644 --- a/benchmarks/bench_plot_polynomial_kernel_approximation.py +++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py @@ -41,21 +41,21 @@ # Author: Daniel Lopez-Sanchez # License: BSD 3 clause -# Will use this for timing results -from time import time +# Load data manipulation functions +from sklearn.datasets import load_digits +from sklearn.model_selection import train_test_split # Some common libraries import matplotlib.pyplot as plt import numpy as np -# Load data manipulation functions -from sklearn.datasets import load_digits -from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch -from sklearn.model_selection import train_test_split -from sklearn.pipeline import Pipeline +# Will use this for timing results +from time import time # Import SVM classifiers and feature map approximation algorithms -from sklearn.svm import SVC, LinearSVC +from sklearn.svm import LinearSVC, SVC +from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch +from sklearn.pipeline import Pipeline # Split data in train and test sets X, y = load_digits()["data"], load_digits()["target"] diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py index 018558ec1d833..c7d67fa2a545d 100644 --- a/benchmarks/bench_plot_randomized_svd.py +++ b/benchmarks/bench_plot_randomized_svd.py @@ -65,29 +65,28 @@ # Author: Giorgio Patrini +import numpy as np +import scipy as sp +import matplotlib.pyplot as plt + import gc -import os.path import pickle -from collections import defaultdict from time import time +from collections import defaultdict +import os.path -import matplotlib.pyplot as plt -import numpy as np -import scipy as sp - +from sklearn.utils._arpack import _init_arpack_v0 +from sklearn.utils import gen_batches +from sklearn.utils.validation import check_random_state +from sklearn.utils.extmath import randomized_svd +from sklearn.datasets import make_low_rank_matrix, make_sparse_uncorrelated from sklearn.datasets import ( - fetch_20newsgroups_vectorized, fetch_lfw_people, - fetch_olivetti_faces, fetch_openml, + fetch_20newsgroups_vectorized, + fetch_olivetti_faces, fetch_rcv1, - make_low_rank_matrix, - make_sparse_uncorrelated, ) -from sklearn.utils import gen_batches -from sklearn.utils._arpack import _init_arpack_v0 -from sklearn.utils.extmath import randomized_svd -from sklearn.utils.validation import check_random_state try: import fbpca diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index 40f91b57f9e23..52d22f6a9c8a0 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -3,14 +3,13 @@ The data is mostly low rank but is a fat infinite tail. """ import gc -from collections import defaultdict from time import time - import numpy as np -from scipy.linalg import svd +from collections import defaultdict -from sklearn.datasets import make_low_rank_matrix +from scipy.linalg import svd from sklearn.utils.extmath import randomized_svd +from sklearn.datasets import make_low_rank_matrix def compute_bench(samples_range, features_range, n_iter=3, rank=50): @@ -54,8 +53,8 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50): if __name__ == "__main__": - import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection + import matplotlib.pyplot as plt samples_range = np.linspace(2, 1000, 4).astype(int) features_range = np.linspace(2, 1000, 4).astype(int) diff --git a/benchmarks/bench_plot_ward.py b/benchmarks/bench_plot_ward.py index fe5cee201dff4..696e833eede20 100644 --- a/benchmarks/bench_plot_ward.py +++ b/benchmarks/bench_plot_ward.py @@ -4,9 +4,9 @@ import time -import matplotlib.pyplot as plt import numpy as np from scipy.cluster import hierarchy +import matplotlib.pyplot as plt from sklearn.cluster import AgglomerativeClustering diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py index bd8c62ecba484..89a4550944f3f 100644 --- a/benchmarks/bench_random_projections.py +++ b/benchmarks/bench_random_projections.py @@ -6,19 +6,19 @@ Benchmarks for random projections. """ -import collections import gc -import optparse import sys +import optparse from datetime import datetime +import collections import numpy as np import scipy.sparse as sp from sklearn import clone from sklearn.random_projection import ( - GaussianRandomProjection, SparseRandomProjection, + GaussianRandomProjection, johnson_lindenstrauss_min_dim, ) diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py index a6a5e3b4fd450..e8fce1c414abf 100644 --- a/benchmarks/bench_rcv1_logreg_convergence.py +++ b/benchmarks/bench_rcv1_logreg_convergence.py @@ -3,15 +3,14 @@ # # License: BSD 3 clause -import gc -import time - import matplotlib.pyplot as plt +from joblib import Memory import numpy as np +import gc +import time -from joblib import Memory -from sklearn.datasets import fetch_rcv1 from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.datasets import fetch_rcv1 from sklearn.linear_model._sag import get_auto_step_size try: diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py index 997dde6bbb08d..581f7e3881e9e 100644 --- a/benchmarks/bench_saga.py +++ b/benchmarks/bench_saga.py @@ -4,25 +4,25 @@ in using multinomial logistic regression in term of learning time. """ import json -import os import time +import os +from joblib import Parallel +from sklearn.utils.fixes import delayed import matplotlib.pyplot as plt import numpy as np -from joblib import Parallel from sklearn.datasets import ( - fetch_20newsgroups_vectorized, fetch_rcv1, - load_digits, load_iris, + load_digits, + fetch_20newsgroups_vectorized, ) from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelBinarizer, LabelEncoder from sklearn.utils.extmath import safe_sparse_dot, softmax -from sklearn.utils.fixes import delayed def fit_single( diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index f4e37c293a5d8..4f1041a6d1022 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -3,14 +3,14 @@ """ import gc -import operator -import optparse -import random import sys +import optparse from datetime import datetime +import operator import matplotlib.pyplot as plt import numpy as np +import random from sklearn.utils.random import sample_without_replacement diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py index 4b1b902795feb..47dd9e9fc758b 100644 --- a/benchmarks/bench_sgd_regression.py +++ b/benchmarks/bench_sgd_regression.py @@ -1,15 +1,16 @@ # Author: Peter Prettenhofer # License: BSD 3 clause +import numpy as np +import matplotlib.pyplot as plt + import gc -from time import time -import matplotlib.pyplot as plt -import numpy as np +from time import time -from sklearn.datasets import make_regression -from sklearn.linear_model import ElasticNet, Ridge, SGDRegressor +from sklearn.linear_model import Ridge, SGDRegressor, ElasticNet from sklearn.metrics import mean_squared_error +from sklearn.datasets import make_regression """ Benchmark for SGD regression diff --git a/benchmarks/bench_sparsify.py b/benchmarks/bench_sparsify.py index 1832ca40c6ddb..f1aa482b8b732 100644 --- a/benchmarks/bench_sparsify.py +++ b/benchmarks/bench_sparsify.py @@ -43,9 +43,8 @@ 60 300 381409 1271.4 97.1 clf.predict(X_test_sparse) """ -import numpy as np from scipy.sparse import csr_matrix - +import numpy as np from sklearn.linear_model import SGDRegressor from sklearn.metrics import r2_score diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py index 50431f6908a0e..4f40e87f74e14 100644 --- a/benchmarks/bench_text_vectorizers.py +++ b/benchmarks/bench_text_vectorizers.py @@ -8,8 +8,8 @@ * psutil (optional, but recommended) """ -import itertools import timeit +import itertools import numpy as np import pandas as pd @@ -18,8 +18,8 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import ( CountVectorizer, - HashingVectorizer, TfidfVectorizer, + HashingVectorizer, ) n_repeat = 3 diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py index a7d46aa729003..1809cb7c5e9c0 100644 --- a/benchmarks/bench_tree.py +++ b/benchmarks/bench_tree.py @@ -13,12 +13,11 @@ training set, classify a sample and plot the time taken as a function of the number of dimensions. """ +import numpy as np +import matplotlib.pyplot as plt import gc from datetime import datetime -import matplotlib.pyplot as plt -import numpy as np - # to store the results scikit_classifier_results = [] scikit_regressor_results = [] diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py index 2ed1db8846eab..aa1a07a67ef44 100644 --- a/benchmarks/bench_tsne_mnist.py +++ b/benchmarks/bench_tsne_mnist.py @@ -7,19 +7,18 @@ # License: BSD 3 clause -import argparse -import json import os import os.path as op from time import time - import numpy as np - +import json +import argparse from joblib import Memory + from sklearn.datasets import fetch_openml -from sklearn.decomposition import PCA from sklearn.manifold import TSNE from sklearn.neighbors import NearestNeighbors +from sklearn.decomposition import PCA from sklearn.utils import check_array from sklearn.utils import shuffle as _shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads diff --git a/benchmarks/plot_tsne_mnist.py b/benchmarks/plot_tsne_mnist.py index fff71eed0a26c..d32e3dd769d6a 100644 --- a/benchmarks/plot_tsne_mnist.py +++ b/benchmarks/plot_tsne_mnist.py @@ -1,8 +1,9 @@ -import argparse -import os.path as op - import matplotlib.pyplot as plt import numpy as np +import os.path as op + +import argparse + LOG_DIR = "mnist_tsne_output" diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py index 8d335e85e502a..68e198f8bdb38 100755 --- a/build_tools/circle/list_versions.py +++ b/build_tools/circle/list_versions.py @@ -4,6 +4,7 @@ import json import re import sys + from distutils.version import LooseVersion from urllib.request import urlopen diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py index d6ebfdd771c5f..dfeb1b92e954f 100644 --- a/build_tools/generate_authors_table.py +++ b/build_tools/generate_authors_table.py @@ -6,13 +6,12 @@ The table should be updated for each new inclusion in the teams. Generating the table requires admin rights. """ -import getpass import sys +import requests +import getpass import time -from os import path from pathlib import Path - -import requests +from os import path print("user:", file=sys.stderr) user = input() diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py index 2549da5cdbf82..ef9bd77254fb5 100644 --- a/build_tools/github/check_wheels.py +++ b/build_tools/github/check_wheels.py @@ -1,9 +1,8 @@ """Checks that dist/* contains the number of wheels built from the .github/workflows/wheels.yml config.""" -import sys -from pathlib import Path - import yaml +from pathlib import Path +import sys gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml" with gh_wheel_path.open("r") as f: diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py index 89db98bff8b5d..bbc941d8f25f7 100644 --- a/build_tools/github/vendor.py +++ b/build_tools/github/vendor.py @@ -11,6 +11,7 @@ import sys import textwrap + TARGET_FOLDER = op.join("sklearn", ".libs") DISTRIBUTOR_INIT = op.join("sklearn", "_distributor_init.py") VCOMP140_SRC_PATH = "C:\\Windows\\System32\\vcomp140.dll" diff --git a/doc/conf.py b/doc/conf.py index a60f411568fdb..8276e8522f133 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,15 +10,14 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import os -import re import sys +import os import warnings +import re from datetime import datetime -from io import StringIO -from pathlib import Path - from packaging.version import parse +from pathlib import Path +from io import StringIO # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory @@ -26,8 +25,8 @@ # absolute, like shown here. sys.path.insert(0, os.path.abspath("sphinxext")) -import sphinx_gallery from github_link import make_linkcode_resolve +import sphinx_gallery from sphinx_gallery.sorting import ExampleTitleSortKey # -- General configuration --------------------------------------------------- @@ -414,8 +413,8 @@ def __call__(self, filename): # enable experimental module so that experimental estimators can be # discovered properly by sphinx -from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.experimental import enable_iterative_imputer # noqa +from sklearn.experimental import enable_halving_search_cv # noqa def make_carousel_thumbs(app, exception): diff --git a/doc/conftest.py b/doc/conftest.py index 4edde566bf50c..10253efeabf98 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -1,14 +1,16 @@ import os -import warnings +from os.path import exists +from os.path import join from os import environ -from os.path import exists, join +import warnings +from sklearn.utils import IS_PYPY +from sklearn.utils._testing import SkipTest +from sklearn.utils._testing import check_skip_network +from sklearn.utils.fixes import parse_version from sklearn.datasets import get_data_home from sklearn.datasets._base import _pkl_filepath from sklearn.datasets._twenty_newsgroups import CACHE_NAME -from sklearn.utils import IS_PYPY -from sklearn.utils._testing import SkipTest, check_skip_network -from sklearn.utils.fixes import parse_version def setup_labeled_faces(): diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py index ac4545e2c8f14..bf51644b67116 100755 --- a/doc/sphinxext/allow_nan_estimators.py +++ b/doc/sphinxext/allow_nan_estimators.py @@ -1,12 +1,15 @@ +from sklearn.utils import all_estimators +from sklearn.compose import ColumnTransformer +from sklearn.pipeline import FeatureUnion +from sklearn.decomposition import SparseCoder +from sklearn.utils.estimator_checks import _construct_instance +from sklearn.utils._testing import SkipTest +from docutils import nodes +import warnings from contextlib import suppress -from docutils import nodes from docutils.parsers.rst import Directive -from sklearn.utils import all_estimators -from sklearn.utils._testing import SkipTest -from sklearn.utils.estimator_checks import _construct_instance - class AllowNanEstimators(Directive): @staticmethod diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py index 48e54d3fc785c..f851a12ec69ea 100644 --- a/doc/sphinxext/doi_role.py +++ b/doc/sphinxext/doi_role.py @@ -16,6 +16,7 @@ """ from docutils import nodes, utils + from sphinx.util.nodes import split_explicit_title diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py index d3e43c8ed0f5e..3992d814b825e 100644 --- a/doc/sphinxext/github_link.py +++ b/doc/sphinxext/github_link.py @@ -1,9 +1,9 @@ +from operator import attrgetter import inspect -import os import subprocess +import os import sys from functools import partial -from operator import attrgetter REVISION_CMD = "git rev-parse --short HEAD" diff --git a/doc/tutorial/machine_learning_map/parse_path.py b/doc/tutorial/machine_learning_map/parse_path.py index 4015c88fe8089..b1c68cec7f76b 100644 --- a/doc/tutorial/machine_learning_map/parse_path.py +++ b/doc/tutorial/machine_learning_map/parse_path.py @@ -6,105 +6,86 @@ """ try: - from pyparsing import ( - CaselessLiteral, - Combine, - Group, - Literal, - OneOrMore, - Optional, - ParseException, - Word, - ZeroOrMore, - nums, - oneOf, - ) + from pyparsing import (ParserElement, Literal, Word, CaselessLiteral, + Optional, Combine, Forward, ZeroOrMore, nums, oneOf, Group, ParseException, OneOrMore) except ImportError: import sys - sys.exit("pyparsing is required") - - -# ParserElement.enablePackrat() - + + +#ParserElement.enablePackrat() def Command(char): - """Case insensitive but case preserving""" + """ Case insensitive but case preserving""" return CaselessPreservingLiteral(char) - - + def Arguments(token): return Group(token) - - + + class CaselessPreservingLiteral(CaselessLiteral): - """Like CaselessLiteral, but returns the match as found - instead of as defined. + """ Like CaselessLiteral, but returns the match as found + instead of as defined. """ - - def __init__(self, matchString): + def __init__( self, matchString ): super().__init__(matchString.upper()) self.name = "'%s'" % matchString self.errmsg = "Expected " + self.name self.myException.msg = self.errmsg - def parseImpl(self, instring, loc, doActions=True): - test = instring[loc : loc + self.matchLen] + def parseImpl( self, instring, loc, doActions=True ): + test = instring[ loc:loc+self.matchLen ] if test.upper() == self.match: - return loc + self.matchLen, test - # ~ raise ParseException( instring, loc, self.errmsg ) + return loc+self.matchLen, test + #~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc exc.pstr = instring - raise exc - - + raise exc + def Sequence(token): - """A sequence of the token""" - return OneOrMore(token + maybeComma) - + """ A sequence of the token""" + return OneOrMore(token+maybeComma) digit_sequence = Word(nums) sign = oneOf("+ -") - def convertToFloat(s, loc, toks): try: return float(toks[0]) except BaseException as e: raise ParseException(loc, "invalid float format %s" % toks[0]) from e +exponent = CaselessLiteral("e")+Optional(sign)+Word(nums) -exponent = CaselessLiteral("e") + Optional(sign) + Word(nums) - -# note that almost all these fields are optional, -# and this can match almost anything. We rely on Pythons built-in -# float() function to clear out invalid values - loosely matching like this -# speeds up parsing quite a lot +#note that almost all these fields are optional, +#and this can match almost anything. We rely on Pythons built-in +#float() function to clear out invalid values - loosely matching like this +#speeds up parsing quite a lot floatingPointConstant = Combine( - Optional(sign) - + Optional(Word(nums)) - + Optional(Literal(".") + Optional(Word(nums))) - + Optional(exponent) + Optional(sign) + + Optional(Word(nums)) + + Optional(Literal(".") + Optional(Word(nums)))+ + Optional(exponent) ) floatingPointConstant.setParseAction(convertToFloat) number = floatingPointConstant -# same as FP constant but don't allow a - sign +#same as FP constant but don't allow a - sign nonnegativeNumber = Combine( - Optional(Word(nums)) - + Optional(Literal(".") + Optional(Word(nums))) - + Optional(exponent) + Optional(Word(nums)) + + Optional(Literal(".") + Optional(Word(nums)))+ + Optional(exponent) ) nonnegativeNumber.setParseAction(convertToFloat) coordinate = number -# comma or whitespace can separate values all over the place in SVG -maybeComma = Optional(Literal(",")).suppress() +#comma or whitespace can separate values all over the place in SVG +maybeComma = Optional(Literal(',')).suppress() coordinateSequence = Sequence(coordinate) @@ -114,34 +95,31 @@ def convertToFloat(s, loc, toks): coordinatePairPair = coordinatePair + maybeComma + coordinatePair coordinatePairPairSequence = Sequence(Group(coordinatePairPair)) -coordinatePairTriple = ( - coordinatePair + maybeComma + coordinatePair + maybeComma + coordinatePair -) +coordinatePairTriple = coordinatePair + maybeComma + coordinatePair + maybeComma + coordinatePair coordinatePairTripleSequence = Sequence(Group(coordinatePairTriple)) -# commands +#commands lineTo = Group(Command("L") + Arguments(coordinatePairSequence)) curve = Group(Command("C") + Arguments(coordinatePairSequence)) moveTo = Group(Command("M") + Arguments(coordinatePairSequence)) -closePath = Group(Command("Z")).setParseAction(lambda t: ("Z", (None,))) +closePath = Group(Command("Z")).setParseAction(lambda t: ('Z', (None,))) flag = oneOf("1 0").setParseAction(lambda t: bool(int((t[0])))) arcRadius = ( - nonnegativeNumber + maybeComma + nonnegativeNumber # rx # ry + nonnegativeNumber + maybeComma + #rx + nonnegativeNumber #ry ).setParseAction(tuple) arcFlags = (flag + maybeComma + flag).setParseAction(tuple) ellipticalArcArgument = Group( - arcRadius - + maybeComma - + number # rx, ry - + maybeComma - + arcFlags # rotation - + coordinatePair # large-arc-flag, sweep-flag # (x,y) + arcRadius + maybeComma + #rx, ry + number + maybeComma +#rotation + arcFlags + #large-arc-flag, sweep-flag + coordinatePair #(x,y) ) ellipticalArc = Group(Command("A") + Arguments(Sequence(ellipticalArcArgument))) @@ -152,75 +130,63 @@ def convertToFloat(s, loc, toks): smoothCurve = Group(Command("S") + Arguments(coordinatePairPairSequence)) -# curve = Group(Command("C") + Arguments(coordinatePairTripleSequence)) +#curve = Group(Command("C") + Arguments(coordinatePairTripleSequence)) horizontalLine = Group(Command("H") + Arguments(coordinateSequence)) verticalLine = Group(Command("V") + Arguments(coordinateSequence)) drawToCommand = ( - lineTo - | moveTo - | closePath - | ellipticalArc - | smoothQuadraticBezierCurveto - | quadraticBezierCurveto - | smoothCurve - | curve - | horizontalLine - | verticalLine -) + lineTo | moveTo | closePath | ellipticalArc | smoothQuadraticBezierCurveto | + quadraticBezierCurveto | smoothCurve | curve | horizontalLine | verticalLine + ) -# ~ number.debug = True +#~ number.debug = True moveToDrawToCommands = moveTo + ZeroOrMore(drawToCommand) path = ZeroOrMore(moveToDrawToCommands) path.keepTabs = True - def get_points(d): commands = path.parseString(d) points = [] currentset = None for command in commands: - if command[0] == "M" or command[0] == "m": + if command[0] == 'M' or command[0] == 'm': currentset = [] points.append(currentset) currentset.append(command[1][-1]) - elif command[0] == "L" or command[0] == "l": + elif command[0] == 'L' or command[0] == 'l': currentset.extend(command[1]) - elif command[0] == "C" or command[0] == "c": + elif command[0] == 'C' or command[0] == 'c': currentset.extend(command[1]) return points - if __name__ == "__main__": - s = ( - "M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 " - "L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z " - "M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 " - "L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 " - "L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 " - "L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 " - "L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 " - "L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 " - "L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 " - "L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 " - "L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 " - "L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 " - "L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 " - "L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z " - "M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 " - "L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 " - "L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 " - "L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 " - "L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 " - "L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 " - "L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 " - "L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 " - "L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 " - "L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 " - "L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 " - "L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 " - "L 329.99311,687.18672 z " - ) + s = ("M 242.96145,653.59282 L 244.83646,650.1553 L 247.02397,649.8428 " + "L 247.33647,650.62405 L 245.30521,653.59282 L 242.96145,653.59282 z " + "M 252.80525,649.99905 L 258.74278,652.49906 L 260.77404,652.18656 " + "L 262.33654,648.43654 L 261.71154,645.15528 L 257.64902,644.68653 " + "L 253.74275,646.40528 L 252.80525,649.99905 z M 282.49289,659.6866 " + "L 286.08665,664.99912 L 288.43041,664.68662 L 289.52417,664.21787 " + "L 290.93042,665.46787 L 294.52419,665.31162 L 295.4617,663.90537 " + "L 292.64918,662.18661 L 290.77417,658.59284 L 288.74291,655.15533 " + "L 283.11789,657.96784 L 282.49289,659.6866 z M 302.02423,668.28039 " + "L 303.27423,666.40538 L 307.8055,667.34288 L 308.43051,666.87413 " + "L 314.36803,667.49913 L 314.05553,668.74914 L 311.55552,670.15539 " + "L 307.33675,669.84289 L 302.02423,668.28039 z M 307.1805,673.28041 " + "L 309.05551,677.03043 L 312.02427,675.93667 L 312.33677,674.37416 " + "L 310.77427,672.3429 L 307.1805,672.0304 L 307.1805,673.28041 z " + "M 313.89928,672.18665 L 316.08679,669.37414 L 320.61806,671.7179 " + "L 324.83683,672.81166 L 329.0556,675.46792 L 329.0556,677.34293 " + "L 325.61809,679.06169 L 320.93056,679.99919 L 318.5868,678.59293 " + "L 313.89928,672.18665 z M 329.99311,687.18672 L 331.55561,685.93672 " + "L 334.83688,687.49923 L 342.18066,690.93674 L 345.46193,692.968 " + "L 347.02443,695.31176 L 348.89944,699.53053 L 352.80571,702.03054 " + "L 352.49321,703.28055 L 348.74319,706.40556 L 344.68067,707.81182 " + "L 343.27442,707.18682 L 340.30565,708.90557 L 337.96189,712.03059 " + "L 335.77438,714.8431 L 334.05562,714.68685 L 330.61811,712.18684 " + "L 330.30561,707.81182 L 330.93061,705.46806 L 329.3681,699.99928 " + "L 327.33684,698.28052 L 327.18059,695.78051 L 329.3681,694.84301 " + "L 331.39936,691.87425 L 331.86811,690.93674 L 330.30561,689.21798 " + "L 329.99311,687.18672 z ") print(path.parseString(s)) diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py index 94e20ff8ad3d2..a0f4a66c7291e 100644 --- a/doc/tutorial/machine_learning_map/pyparsing.py +++ b/doc/tutorial/machine_learning_map/pyparsing.py @@ -23,7 +23,8 @@ # # flake8: noqa -__doc__ = """ +__doc__ = \ +""" pyparsing module - Classes and methods to define and execute parsing grammars The pyparsing module is an alternative approach to creating and executing simple grammars, @@ -31,8 +32,8 @@ don't need to learn a new syntax for defining grammars or matching expressions - the parsing module provides a library of classes that you use to construct the grammar directly in Python. -Here is a program to parse "Hello, World!" (or any greeting of the form -C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements +Here is a program to parse "Hello, World!" (or any greeting of the form +C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements (L{'+'} operator gives L{And} expressions, strings are auto-converted to L{Literal} expressions):: @@ -64,18 +65,18 @@ class names, and the use of '+', '|' and '^' operators. __versionTime__ = "06 Mar 2017 02:06 UTC" __author__ = "Paul McGuire " -import collections +import string +from weakref import ref as wkref import copy -import pprint +import sys +import warnings import re import sre_constants -import string -import sys +import collections +import pprint import traceback import types -import warnings from datetime import datetime -from weakref import ref as wkref try: from _thread import RLock @@ -90,114 +91,27 @@ class names, and the use of '+', '|' and '^' operators. except ImportError: _OrderedDict = None -# ~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) +#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) __all__ = [ - "And", - "CaselessKeyword", - "CaselessLiteral", - "CharsNotIn", - "Combine", - "Dict", - "Each", - "Empty", - "FollowedBy", - "Forward", - "GoToColumn", - "Group", - "Keyword", - "LineEnd", - "LineStart", - "Literal", - "MatchFirst", - "NoMatch", - "NotAny", - "OneOrMore", - "OnlyOnce", - "Optional", - "Or", - "ParseBaseException", - "ParseElementEnhance", - "ParseException", - "ParseExpression", - "ParseFatalException", - "ParseResults", - "ParseSyntaxException", - "ParserElement", - "QuotedString", - "RecursiveGrammarException", - "Regex", - "SkipTo", - "StringEnd", - "StringStart", - "Suppress", - "Token", - "TokenConverter", - "White", - "Word", - "WordEnd", - "WordStart", - "ZeroOrMore", - "alphanums", - "alphas", - "alphas8bit", - "anyCloseTag", - "anyOpenTag", - "cStyleComment", - "col", - "commaSeparatedList", - "commonHTMLEntity", - "countedArray", - "cppStyleComment", - "dblQuotedString", - "dblSlashComment", - "delimitedList", - "dictOf", - "downcaseTokens", - "empty", - "hexnums", - "htmlComment", - "javaStyleComment", - "line", - "lineEnd", - "lineStart", - "lineno", - "makeHTMLTags", - "makeXMLTags", - "matchOnlyAtCol", - "matchPreviousExpr", - "matchPreviousLiteral", - "nestedExpr", - "nullDebugAction", - "nums", - "oneOf", - "opAssoc", - "operatorPrecedence", - "printables", - "punc8bit", - "pythonStyleComment", - "quotedString", - "removeQuotes", - "replaceHTMLEntity", - "replaceWith", - "restOfLine", - "sglQuotedString", - "srange", - "stringEnd", - "stringStart", - "traceParseAction", - "unicodeString", - "upcaseTokens", - "withAttribute", - "indentedBlock", - "originalTextFor", - "ungroup", - "infixNotation", - "locatedExpr", - "withClass", - "CloseMatch", - "tokenMap", - "pyparsing_common", +'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', +'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', +'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', +'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', +'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', +'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', +'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', +'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', +'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', +'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', +'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', +'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', +'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', +'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', +'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', +'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', +'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', +'CloseMatch', 'tokenMap', 'pyparsing_common', ] system_version = tuple(sys.version_info)[:3] @@ -209,19 +123,7 @@ class names, and the use of '+', '|' and '^' operators. _ustr = str # build list of single arg builtins, that can be used as parse actions - singleArgBuiltins = [ - sum, - len, - sorted, - reversed, - list, - tuple, - set, - any, - all, - min, - max, - ] + singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] else: _MAX_INT = sys.maxint @@ -229,10 +131,10 @@ class names, and the use of '+', '|' and '^' operators. def _ustr(obj): """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. """ - if isinstance(obj, unicode): + if isinstance(obj,unicode): return obj try: @@ -242,53 +144,47 @@ def _ustr(obj): except UnicodeEncodeError: # Else encode it - ret = unicode(obj).encode(sys.getdefaultencoding(), "xmlcharrefreplace") - xmlcharref = Regex(r"&#\d+;") - xmlcharref.setParseAction(lambda t: "\\u" + hex(int(t[0][2:-1]))[2:]) + ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') + xmlcharref = Regex(r'&#\d+;') + xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) return xmlcharref.transformString(ret) # build list of single arg builtins, tolerant of Python version, that can be used as parse actions singleArgBuiltins = [] import __builtin__ - for fname in "sum len sorted reversed list tuple set any all min max".split(): try: - singleArgBuiltins.append(getattr(__builtin__, fname)) + singleArgBuiltins.append(getattr(__builtin__,fname)) except AttributeError: continue - + _generatorType = type((y for y in range(1))) - - + def _xml_escape(data): """Escape &, <, >, ", ', etc. in a string of data.""" # ampersand must be replaced first - from_symbols = "&><\"'" - to_symbols = ("&" + s + ";" for s in "amp gt lt quot apos".split()) - for from_, to_ in zip(from_symbols, to_symbols): + from_symbols = '&><"\'' + to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) + for from_,to_ in zip(from_symbols, to_symbols): data = data.replace(from_, to_) return data - class _Constants(object): pass - -alphas = string.ascii_uppercase + string.ascii_lowercase -nums = "0123456789" -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -_bslash = chr(92) +alphas = string.ascii_uppercase + string.ascii_lowercase +nums = "0123456789" +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +_bslash = chr(92) printables = "".join(c for c in string.printable if c not in string.whitespace) - class ParseBaseException(Exception): """base exception class for all parsing runtime exceptions""" - # Performance tuning: we construct a *lot* of these, so keep this # constructor as small and fast as possible - def __init__(self, pstr, loc=0, msg=None, elem=None): + def __init__( self, pstr, loc=0, msg=None, elem=None ): self.loc = loc if msg is None: self.msg = pstr @@ -302,53 +198,44 @@ def __init__(self, pstr, loc=0, msg=None, elem=None): @classmethod def _from_exception(cls, pe): """ - internal factory method to simplify creating one type of ParseException + internal factory method to simplify creating one type of ParseException from another - avoids having __init__ signature conflicts among subclasses """ return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) - def __getattr__(self, aname): + def __getattr__( self, aname ): """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text """ - if aname == "lineno": - return lineno(self.loc, self.pstr) - elif aname in ("col", "column"): - return col(self.loc, self.pstr) - elif aname == "line": - return line(self.loc, self.pstr) + if( aname == "lineno" ): + return lineno( self.loc, self.pstr ) + elif( aname in ("col", "column") ): + return col( self.loc, self.pstr ) + elif( aname == "line" ): + return line( self.loc, self.pstr ) else: raise AttributeError(aname) - def __str__(self): - return "%s (at char %d), (line:%d, col:%d)" % ( - self.msg, - self.loc, - self.lineno, - self.column, - ) - - def __repr__(self): + def __str__( self ): + return "%s (at char %d), (line:%d, col:%d)" % \ + ( self.msg, self.loc, self.lineno, self.column ) + def __repr__( self ): return _ustr(self) - - def markInputline(self, markerString=">!<"): + def markInputline( self, markerString = ">!<" ): """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. + the location of the exception with a special symbol. """ line_str = self.line line_column = self.column - 1 if markerString: - line_str = "".join( - (line_str[:line_column], markerString, line_str[line_column:]) - ) + line_str = "".join((line_str[:line_column], + markerString, line_str[line_column:])) return line_str.strip() - def __dir__(self): return "lineno col line".split() + dir(type(self)) - class ParseException(ParseBaseException): """ Exception thrown when parse expressions don't match class; @@ -356,74 +243,61 @@ class ParseException(ParseBaseException): - lineno - returns the line number of the exception text - col - returns the column number of the exception text - line - returns the line containing the exception text - + Example:: try: Word(nums).setName("integer").parseString("ABC") except ParseException as pe: print(pe) print("column: {}".format(pe.col)) - + prints:: Expected integer (at char 0), (line:1, col:1) column: 1 """ - pass - class ParseFatalException(ParseBaseException): """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" - + is found; stops all parsing immediately""" pass - class ParseSyntaxException(ParseFatalException): """just like L{ParseFatalException}, but thrown internally when an - L{ErrorStop} ('-' operator) indicates that parsing is to stop - immediately because an unbacktrackable syntax error has been found""" - + L{ErrorStop} ('-' operator) indicates that parsing is to stop + immediately because an unbacktrackable syntax error has been found""" pass - -# ~ class ReparseException(ParseBaseException): -# ~ """Experimental class - parse actions can raise this exception to cause -# ~ pyparsing to reparse the input string: -# ~ - with a modified input string, and/or -# ~ - with a modified start location -# ~ Set the values of the ReparseException in the constructor, and raise the -# ~ exception in a parse action to cause pyparsing to use the new string/location. -# ~ Setting the values as None causes no change to be made. -# ~ """ -# ~ def __init_( self, newstring, restartLoc ): -# ~ self.newParseText = newstring -# ~ self.reparseLoc = restartLoc - +#~ class ReparseException(ParseBaseException): + #~ """Experimental class - parse actions can raise this exception to cause + #~ pyparsing to reparse the input string: + #~ - with a modified input string, and/or + #~ - with a modified start location + #~ Set the values of the ReparseException in the constructor, and raise the + #~ exception in a parse action to cause pyparsing to use the new string/location. + #~ Setting the values as None causes no change to be made. + #~ """ + #~ def __init_( self, newstring, restartLoc ): + #~ self.newParseText = newstring + #~ self.reparseLoc = restartLoc class RecursiveGrammarException(Exception): """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" - - def __init__(self, parseElementList): + def __init__( self, parseElementList ): self.parseElementTrace = parseElementList - def __str__(self): + def __str__( self ): return "RecursiveGrammarException: %s" % self.parseElementTrace - class _ParseResultsWithOffset(object): - def __init__(self, p1, p2): - self.tup = (p1, p2) - - def __getitem__(self, i): + def __init__(self,p1,p2): + self.tup = (p1,p2) + def __getitem__(self,i): return self.tup[i] - def __repr__(self): return repr(self.tup[0]) - - def setOffset(self, i): - self.tup = (self.tup[0], i) - + def setOffset(self,i): + self.tup = (self.tup[0],i) class ParseResults(object): """ @@ -434,8 +308,8 @@ class ParseResults(object): Example:: integer = Word(nums) - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' + date_str = (integer.setResultsName("year") + '/' + + integer.setResultsName("month") + '/' + integer.setResultsName("day")) # equivalent form: # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") @@ -464,8 +338,7 @@ def test(s, fn=repr): - month: 12 - year: 1999 """ - - def __new__(cls, toklist=None, name=None, asList=True, modal=True): + def __new__(cls, toklist=None, name=None, asList=True, modal=True ): if isinstance(toklist, cls): return toklist retobj = object.__new__(cls) @@ -474,9 +347,7 @@ def __new__(cls, toklist=None, name=None, asList=True, modal=True): # Performance tuning: we construct a *lot* of these, so keep this # constructor as small and fast as possible - def __init__( - self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance - ): + def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): if self.__doinit: self.__doinit = False self.__name = None @@ -497,109 +368,89 @@ def __init__( if name is not None and name: if not modal: self.__accumNames[name] = 0 - if isinstance(name, int): - name = _ustr( - name - ) # will always return a str, but use _ustr for consistency + if isinstance(name,int): + name = _ustr(name) # will always return a str, but use _ustr for consistency self.__name = name - if not ( - isinstance(toklist, (type(None), basestring, list)) - and toklist in (None, "", []) - ): - if isinstance(toklist, basestring): - toklist = [toklist] + if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): + if isinstance(toklist,basestring): + toklist = [ toklist ] if asList: - if isinstance(toklist, ParseResults): - self[name] = _ParseResultsWithOffset(toklist.copy(), 0) + if isinstance(toklist,ParseResults): + self[name] = _ParseResultsWithOffset(toklist.copy(),0) else: - self[name] = _ParseResultsWithOffset( - ParseResults(toklist[0]), 0 - ) + self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) self[name].__name = name else: try: self[name] = toklist[0] - except (KeyError, TypeError, IndexError): + except (KeyError,TypeError,IndexError): self[name] = toklist - def __getitem__(self, i): - if isinstance(i, (int, slice)): + def __getitem__( self, i ): + if isinstance( i, (int,slice) ): return self.__toklist[i] else: if i not in self.__accumNames: return self.__tokdict[i][-1][0] else: - return ParseResults([v[0] for v in self.__tokdict[i]]) + return ParseResults([ v[0] for v in self.__tokdict[i] ]) - def __setitem__(self, k, v, isinstance=isinstance): - if isinstance(v, _ParseResultsWithOffset): - self.__tokdict[k] = self.__tokdict.get(k, list()) + [v] + def __setitem__( self, k, v, isinstance=isinstance ): + if isinstance(v,_ParseResultsWithOffset): + self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] sub = v[0] - elif isinstance(k, (int, slice)): + elif isinstance(k,(int,slice)): self.__toklist[k] = v sub = v else: - self.__tokdict[k] = self.__tokdict.get(k, list()) + [ - _ParseResultsWithOffset(v, 0) - ] + self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] sub = v - if isinstance(sub, ParseResults): + if isinstance(sub,ParseResults): sub.__parent = wkref(self) - def __delitem__(self, i): - if isinstance(i, (int, slice)): - mylen = len(self.__toklist) + def __delitem__( self, i ): + if isinstance(i,(int,slice)): + mylen = len( self.__toklist ) del self.__toklist[i] # convert int to slice if isinstance(i, int): if i < 0: i += mylen - i = slice(i, i + 1) + i = slice(i, i+1) # get removed indices removed = list(range(*i.indices(mylen))) removed.reverse() # fixup indices in token dictionary - for name, occurrences in self.__tokdict.items(): + for name,occurrences in self.__tokdict.items(): for j in removed: for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset( - value, position - (position > j) - ) + occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) else: del self.__tokdict[i] - def __contains__(self, k): + def __contains__( self, k ): return k in self.__tokdict - def __len__(self): - return len(self.__toklist) - - def __bool__(self): - return not not self.__toklist - + def __len__( self ): return len( self.__toklist ) + def __bool__(self): return ( not not self.__toklist ) __nonzero__ = __bool__ - - def __iter__(self): - return iter(self.__toklist) - - def __reversed__(self): - return iter(self.__toklist[::-1]) - - def _iterkeys(self): + def __iter__( self ): return iter( self.__toklist ) + def __reversed__( self ): return iter( self.__toklist[::-1] ) + def _iterkeys( self ): if hasattr(self.__tokdict, "iterkeys"): return self.__tokdict.iterkeys() else: return iter(self.__tokdict) - def _itervalues(self): + def _itervalues( self ): return (self[k] for k in self._iterkeys()) - - def _iteritems(self): + + def _iteritems( self ): return ((k, self[k]) for k in self._iterkeys()) if PY_3: - keys = _iterkeys + keys = _iterkeys """Returns an iterator of all named result keys (Python 3.x only).""" values = _itervalues @@ -618,32 +469,32 @@ def _iteritems(self): iteritems = _iteritems """Returns an iterator of all named result key-value tuples (Python 2.x only).""" - def keys(self): + def keys( self ): """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" return list(self.iterkeys()) - def values(self): + def values( self ): """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" return list(self.itervalues()) - - def items(self): + + def items( self ): """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" return list(self.iteritems()) - def haskeys(self): + def haskeys( self ): """Since keys() returns an iterator, this method is helpful in bypassing - code that looks for the existence of any defined results names.""" + code that looks for the existence of any defined results names.""" return bool(self.__tokdict) - - def pop(self, *args, **kwargs): + + def pop( self, *args, **kwargs): """ Removes and returns item at specified index (default=C{last}). Supports both C{list} and C{dict} semantics for C{pop()}. If passed no argument or an integer argument, it will use C{list} semantics - and pop tokens from the list of parsed tokens. If passed a + and pop tokens from the list of parsed tokens. If passed a non-integer argument (most likely a string), it will use C{dict} - semantics and pop the corresponding value from any defined - results names. A second default return value argument is + semantics and pop the corresponding value from any defined + results names. A second default return value argument is supported, just as in C{dict.pop()}. Example:: @@ -671,12 +522,14 @@ def remove_LABEL(tokens): """ if not args: args = [-1] - for k, v in kwargs.items(): - if k == "default": + for k,v in kwargs.items(): + if k == 'default': args = (args[0], v) else: raise TypeError("pop() got an unexpected keyword argument '%s'" % k) - if isinstance(args[0], int) or len(args) == 1 or args[0] in self: + if (isinstance(args[0], int) or + len(args) == 1 or + args[0] in self): index = args[0] ret = self[index] del self[index] @@ -692,10 +545,10 @@ def get(self, key, defaultValue=None): C{defaultValue} is specified. Similar to C{dict.get()}. - + Example:: integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") result = date_str.parseString("1999/12/31") print(result.get("year")) # -> '1999' @@ -707,10 +560,10 @@ def get(self, key, defaultValue=None): else: return defaultValue - def insert(self, index, insStr): + def insert( self, index, insStr ): """ Inserts new element at location index in the list of parsed tokens. - + Similar to C{list.insert()}. Example:: @@ -723,19 +576,17 @@ def insert_locn(locn, tokens): """ self.__toklist.insert(index, insStr) # fixup indices in token dictionary - for name, occurrences in self.__tokdict.items(): + for name,occurrences in self.__tokdict.items(): for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset( - value, position + (position > index) - ) + occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) - def append(self, item): + def append( self, item ): """ Add single element to end of ParseResults list of elements. Example:: print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] - + # use a parse action to compute the sum of the parsed integers, and add it to the end def append_sum(tokens): tokens.append(sum(map(int, tokens))) @@ -743,13 +594,13 @@ def append_sum(tokens): """ self.__toklist.append(item) - def extend(self, itemseq): + def extend( self, itemseq ): """ Add sequence of elements to end of ParseResults list of elements. Example:: patt = OneOrMore(Word(alphas)) - + # use a parse action to append the reverse of the matched strings, to make a palindrome def make_palindrome(tokens): tokens.extend(reversed([t[::-1] for t in tokens])) @@ -761,84 +612,74 @@ def make_palindrome(tokens): else: self.__toklist.extend(itemseq) - def clear(self): + def clear( self ): """ Clear all elements and results names. """ del self.__toklist[:] self.__tokdict.clear() - def __getattr__(self, name): + def __getattr__( self, name ): try: return self[name] except KeyError: return "" - + if name in self.__tokdict: if name not in self.__accumNames: return self.__tokdict[name][-1][0] else: - return ParseResults([v[0] for v in self.__tokdict[name]]) + return ParseResults([ v[0] for v in self.__tokdict[name] ]) else: return "" - def __add__(self, other): + def __add__( self, other ): ret = self.copy() ret += other return ret - def __iadd__(self, other): + def __iadd__( self, other ): if other.__tokdict: offset = len(self.__toklist) - addoffset = lambda a: offset if a < 0 else a + offset + addoffset = lambda a: offset if a<0 else a+offset otheritems = other.__tokdict.items() - otherdictitems = [ - (k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) - for (k, vlist) in otheritems - for v in vlist - ] - for k, v in otherdictitems: + otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) + for (k,vlist) in otheritems for v in vlist] + for k,v in otherdictitems: self[k] = v - if isinstance(v[0], ParseResults): + if isinstance(v[0],ParseResults): v[0].__parent = wkref(self) - + self.__toklist += other.__toklist - self.__accumNames.update(other.__accumNames) + self.__accumNames.update( other.__accumNames ) return self def __radd__(self, other): - if isinstance(other, int) and other == 0: + if isinstance(other,int) and other == 0: # useful for merging many ParseResults using sum() builtin return self.copy() else: # this may raise a TypeError - so be it return other + self + + def __repr__( self ): + return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) - def __repr__(self): - return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict)) - - def __str__(self): - return ( - "[" - + ", ".join( - _ustr(i) if isinstance(i, ParseResults) else repr(i) - for i in self.__toklist - ) - + "]" - ) + def __str__( self ): + return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' - def _asStringList(self, sep=""): + def _asStringList( self, sep='' ): out = [] for item in self.__toklist: if out and sep: out.append(sep) - if isinstance(item, ParseResults): + if isinstance( item, ParseResults ): out += item._asStringList() else: - out.append(_ustr(item)) + out.append( _ustr(item) ) return out - def asList(self): + def asList( self ): """ Returns the parse results as a nested list of matching tokens, all converted to strings. @@ -847,27 +688,24 @@ def asList(self): result = patt.parseString("sldkj lsdkj sldkj") # even though the result prints in string-like form, it is actually a pyparsing ParseResults print(type(result), result) # -> ['sldkj', 'lsdkj', 'sldkj'] - + # Use asList() to create an actual list result_list = result.asList() print(type(result_list), result_list) # -> ['sldkj', 'lsdkj', 'sldkj'] """ - return [ - res.asList() if isinstance(res, ParseResults) else res - for res in self.__toklist - ] + return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] - def asDict(self): + def asDict( self ): """ Returns the named parse results as a nested dictionary. Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - + result = date_str.parseString('12/31/1999') print(type(result), repr(result)) # -> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) - + result_dict = result.asDict() print(type(result_dict), repr(result_dict)) # -> {'day': '1999', 'year': '12', 'month': '31'} @@ -880,7 +718,7 @@ def asDict(self): item_fn = self.items else: item_fn = self.iteritems - + def toItem(obj): if isinstance(obj, ParseResults): if obj.haskeys(): @@ -889,29 +727,28 @@ def toItem(obj): return [toItem(v) for v in obj] else: return obj + + return dict((k,toItem(v)) for k,v in item_fn()) - return dict((k, toItem(v)) for k, v in item_fn()) - - def copy(self): + def copy( self ): """ Returns a new copy of a C{ParseResults} object. """ - ret = ParseResults(self.__toklist) + ret = ParseResults( self.__toklist ) ret.__tokdict = self.__tokdict.copy() ret.__parent = self.__parent - ret.__accumNames.update(self.__accumNames) + ret.__accumNames.update( self.__accumNames ) ret.__name = self.__name return ret - def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): + def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): """ (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. """ nl = "\n" out = [] - namedItems = dict( - (v[1], k) for (k, vlist) in self.__tokdict.items() for v in vlist - ) + namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() + for v in vlist) nextLevelIndent = indent + " " # collapse out indents if formatting is not desired @@ -933,28 +770,20 @@ def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): else: selfTag = "ITEM" - out += [nl, indent, "<", selfTag, ">"] + out += [ nl, indent, "<", selfTag, ">" ] - for i, res in enumerate(self.__toklist): - if isinstance(res, ParseResults): + for i,res in enumerate(self.__toklist): + if isinstance(res,ParseResults): if i in namedItems: - out += [ - res.asXML( - namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted, - ) - ] + out += [ res.asXML(namedItems[i], + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] else: - out += [ - res.asXML( - None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted, - ) - ] + out += [ res.asXML(None, + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] else: # individual token, see if there is a name for it resTag = None @@ -966,42 +795,34 @@ def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): else: resTag = "ITEM" xmlBodyText = _xml_escape(_ustr(res)) - out += [ - nl, - nextLevelIndent, - "<", - resTag, - ">", - xmlBodyText, - "", - ] - - out += [nl, indent, ""] + out += [ nl, nextLevelIndent, "<", resTag, ">", + xmlBodyText, + "" ] + + out += [ nl, indent, "" ] return "".join(out) - def __lookup(self, sub): - for k, vlist in self.__tokdict.items(): - for v, loc in vlist: + def __lookup(self,sub): + for k,vlist in self.__tokdict.items(): + for v,loc in vlist: if sub is v: return k return None def getName(self): r""" - Returns the results name for this token expression. Useful when several + Returns the results name for this token expression. Useful when several different expressions might match at a particular location. Example:: integer = Word(nums) ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") house_number_expr = Suppress('#') + Word(nums, alphanums) - user_data = (Group(house_number_expr)("house_number") + user_data = (Group(house_number_expr)("house_number") | Group(ssn_expr)("ssn") | Group(integer)("age")) user_info = OneOrMore(user_data) - + result = user_info.parseString("22 111-22-3333 #221B") for item in result: print(item.getName(), ':', item[0]) @@ -1018,16 +839,14 @@ def getName(self): return par.__lookup(self) else: return None - elif ( - len(self) == 1 - and len(self.__tokdict) == 1 - and next(iter(self.__tokdict.values()))[0][1] in (0, -1) - ): + elif (len(self) == 1 and + len(self.__tokdict) == 1 and + next(iter(self.__tokdict.values()))[0][1] in (0,-1)): return next(iter(self.__tokdict.keys())) else: return None - def dump(self, indent="", depth=0, full=True): + def dump(self, indent='', depth=0, full=True): """ Diagnostic method for listing out the contents of a C{ParseResults}. Accepts an optional C{indent} argument so that this string can be embedded @@ -1036,7 +855,7 @@ def dump(self, indent="", depth=0, full=True): Example:: integer = Word(nums) date_str = integer("year") + '/' + integer("month") + '/' + integer("day") - + result = date_str.parseString('12/31/1999') print(result.dump()) prints:: @@ -1046,56 +865,36 @@ def dump(self, indent="", depth=0, full=True): - year: 12 """ out = [] - NL = "\n" - out.append(indent + _ustr(self.asList())) + NL = '\n' + out.append( indent+_ustr(self.asList()) ) if full: if self.haskeys(): - items = sorted((str(k), v) for k, v in self.items()) - for k, v in items: + items = sorted((str(k), v) for k,v in self.items()) + for k,v in items: if out: out.append(NL) - out.append("%s%s- %s: " % (indent, (" " * depth), k)) - if isinstance(v, ParseResults): + out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) + if isinstance(v,ParseResults): if v: - out.append(v.dump(indent, depth + 1)) + out.append( v.dump(indent,depth+1) ) else: out.append(_ustr(v)) else: out.append(repr(v)) - elif any(isinstance(vv, ParseResults) for vv in self): + elif any(isinstance(vv,ParseResults) for vv in self): v = self - for i, vv in enumerate(v): - if isinstance(vv, ParseResults): - out.append( - "\n%s%s[%d]:\n%s%s%s" - % ( - indent, - (" " * (depth)), - i, - indent, - (" " * (depth + 1)), - vv.dump(indent, depth + 1), - ) - ) + for i,vv in enumerate(v): + if isinstance(vv,ParseResults): + out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) else: - out.append( - "\n%s%s[%d]:\n%s%s%s" - % ( - indent, - (" " * (depth)), - i, - indent, - (" " * (depth + 1)), - _ustr(vv), - ) - ) - + out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) + return "".join(out) def pprint(self, *args, **kwargs): """ Pretty-printer for parsed results as a list, using the C{pprint} module. - Accepts additional positional or keyword args as defined for the + Accepts additional positional or keyword args as defined for the C{pprint.pprint} method. (U{https://docs.python.org/3/library/pprint.html#pprint.pprint}) Example:: @@ -1117,19 +916,18 @@ def pprint(self, *args, **kwargs): # add support for pickle protocol def __getstate__(self): - return ( - self.__toklist, - ( - self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name, - ), - ) - - def __setstate__(self, state): + return ( self.__toklist, + ( self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name ) ) + + def __setstate__(self,state): self.__toklist = state[0] - (self.__tokdict, par, inAccumNames, self.__name) = state[1] + (self.__tokdict, + par, + inAccumNames, + self.__name) = state[1] self.__accumNames = {} self.__accumNames.update(inAccumNames) if par is not None: @@ -1141,128 +939,109 @@ def __getnewargs__(self): return self.__toklist, self.__name, self.__asList, self.__modal def __dir__(self): - return dir(type(self)) + list(self.keys()) - + return (dir(type(self)) + list(self.keys())) collections.MutableMapping.register(ParseResults) - -def col(loc, strg): +def col (loc,strg): """Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing C{}s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ s = strg - return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc) - + return 1 if 0} for more information - on parsing strings containing C{}s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return strg.count("\n", 0, loc) + 1 - - -def line(loc, strg): - """Returns the line of text containing loc within a string, counting newlines as line separators.""" + The first line is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing C{}s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return strg.count("\n",0,loc) + 1 + +def line( loc, strg ): + """Returns the line of text containing loc within a string, counting newlines as line separators. + """ lastCR = strg.rfind("\n", 0, loc) nextCR = strg.find("\n", loc) if nextCR >= 0: - return strg[lastCR + 1 : nextCR] + return strg[lastCR+1:nextCR] else: - return strg[lastCR + 1 :] - - -def _defaultStartDebugAction(instring, loc, expr): - print( - ( - "Match " - + _ustr(expr) - + " at loc " - + _ustr(loc) - + "(%d,%d)" % (lineno(loc, instring), col(loc, instring)) - ) - ) - + return strg[lastCR+1:] -def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks): - print("Matched " + _ustr(expr) + " -> " + str(toks.asList())) +def _defaultStartDebugAction( instring, loc, expr ): + print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) +def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): + print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) -def _defaultExceptionDebugAction(instring, loc, expr, exc): - print("Exception raised:" + _ustr(exc)) - +def _defaultExceptionDebugAction( instring, loc, expr, exc ): + print ("Exception raised:" + _ustr(exc)) def nullDebugAction(*args): """'Do-nothing' debug action, to suppress debugging output during parsing.""" pass - # Only works on Python 3.x - nonlocal is toxic to Python 2 installs -# ~ 'decorator to trim function calls to match the arity of the target' -# ~ def _trim_arity(func, maxargs=3): -# ~ if func in singleArgBuiltins: -# ~ return lambda s,l,t: func(t) -# ~ limit = 0 -# ~ foundArity = False -# ~ def wrapper(*args): -# ~ nonlocal limit,foundArity -# ~ while 1: -# ~ try: -# ~ ret = func(*args[limit:]) -# ~ foundArity = True -# ~ return ret -# ~ except TypeError: -# ~ if limit == maxargs or foundArity: -# ~ raise -# ~ limit += 1 -# ~ continue -# ~ return wrapper +#~ 'decorator to trim function calls to match the arity of the target' +#~ def _trim_arity(func, maxargs=3): + #~ if func in singleArgBuiltins: + #~ return lambda s,l,t: func(t) + #~ limit = 0 + #~ foundArity = False + #~ def wrapper(*args): + #~ nonlocal limit,foundArity + #~ while 1: + #~ try: + #~ ret = func(*args[limit:]) + #~ foundArity = True + #~ return ret + #~ except TypeError: + #~ if limit == maxargs or foundArity: + #~ raise + #~ limit += 1 + #~ continue + #~ return wrapper # this version is Python 2.x-3.x cross-compatible -"decorator to trim function calls to match the arity of the target" - - +'decorator to trim function calls to match the arity of the target' def _trim_arity(func, maxargs=2): if func in singleArgBuiltins: - return lambda s, l, t: func(t) + return lambda s,l,t: func(t) limit = [0] foundArity = [False] - + def extract_stack(limit=0): offset = -2 - frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset] + frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] return [(frame_summary.filename, frame_summary.lineno)] - def extract_tb(tb, limit=0): frames = traceback.extract_tb(tb, limit=limit) frame_summary = frames[-1] return [(frame_summary.filename, frame_summary.lineno)] - - # synthesize what would be returned by traceback.extract_stack at the call to + + # synthesize what would be returned by traceback.extract_stack at the call to # user's parse action 'func', so that we don't incur call penalty at parse time - + LINE_DIFF = 6 - # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND + # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! this_line = extract_stack(limit=2)[-1] - pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF) + pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) def wrapper(*args): while 1: try: - ret = func(*args[limit[0] :]) + ret = func(*args[limit[0]:]) foundArity[0] = True return ret except TypeError: @@ -1285,29 +1064,28 @@ def wrapper(*args): # copy func name to wrapper for sensible debug output func_name = "" try: - func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) + func_name = getattr(func, '__name__', + getattr(func, '__class__').__name__) except Exception: func_name = str(func) wrapper.__name__ = func_name return wrapper - class ParserElement(object): """Abstract base level parser element class.""" - DEFAULT_WHITE_CHARS = " \n\t\r" verbose_stacktrace = False @staticmethod - def setDefaultWhitespaceChars(chars): + def setDefaultWhitespaceChars( chars ): r""" Overrides the default whitespace chars Example:: # default whitespace chars are space, and newline OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] - + # change to just treat newline as significant ParserElement.setDefaultWhitespaceChars(" \t") OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] @@ -1318,84 +1096,84 @@ def setDefaultWhitespaceChars(chars): def inlineLiteralsUsing(cls): """ Set class to be used for inclusion of string literals into a parser. - + Example:: # default literal class used is Literal integer = Word(nums) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] # change to Suppress ParserElement.inlineLiteralsUsing(Suppress) - date_str = integer("year") + '/' + integer("month") + '/' + integer("day") + date_str = integer("year") + '/' + integer("month") + '/' + integer("day") date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] """ ParserElement._literalStringClass = cls - def __init__(self, savelist=False): + def __init__( self, savelist=False ): self.parseAction = list() self.failAction = None - # ~ self.name = "" # don't define self.name, let subclasses try/except upcall + #~ self.name = "" # don't define self.name, let subclasses try/except upcall self.strRepr = None self.resultsName = None self.saveAsList = savelist self.skipWhitespace = True self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS self.copyDefaultWhiteChars = True - self.mayReturnEmpty = False # used when checking for left-recursion + self.mayReturnEmpty = False # used when checking for left-recursion self.keepTabs = False self.ignoreExprs = list() self.debug = False self.streamlined = False - self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index + self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index self.errmsg = "" - self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) - self.debugActions = (None, None, None) # custom debug actions + self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) + self.debugActions = ( None, None, None ) #custom debug actions self.re = None - self.callPreparse = True # used to avoid redundant calls to preParse + self.callPreparse = True # used to avoid redundant calls to preParse self.callDuringTry = False - def copy(self): + def copy( self ): """ Make a copy of this C{ParserElement}. Useful for defining different parse actions for the same parsing pattern, using copies of the original parse element. - + Example:: integer = Word(nums).setParseAction(lambda toks: int(toks[0])) integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") - + print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) prints:: [5120, 100, 655360, 268435456] Equivalent form of C{expr.copy()} is just C{expr()}:: integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") """ - cpy = copy.copy(self) + cpy = copy.copy( self ) cpy.parseAction = self.parseAction[:] cpy.ignoreExprs = self.ignoreExprs[:] if self.copyDefaultWhiteChars: cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS return cpy - def setName(self, name): + def setName( self, name ): """ Define name for this expression, makes debugging and exception messages clearer. - + Example:: Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) """ self.name = name self.errmsg = "Expected " + self.name - if hasattr(self, "exception"): + if hasattr(self,"exception"): self.exception.msg = self.errmsg return self - def setResultsName(self, name, listAllMatches=False): + def setResultsName( self, name, listAllMatches=False ): """ Define name for referencing matching tokens as a nested attribute of the returned parse results. @@ -1404,12 +1182,12 @@ def setResultsName(self, name, listAllMatches=False): integer, and reference it in multiple places with different names. You can also set results names using the abbreviated syntax, - C{expr("name")} in place of C{expr.setResultsName("name")} - + C{expr("name")} in place of C{expr.setResultsName("name")} - see L{I{__call__}<__call__>}. Example:: - date_str = (integer.setResultsName("year") + '/' - + integer.setResultsName("month") + '/' + date_str = (integer.setResultsName("year") + '/' + + integer.setResultsName("month") + '/' + integer.setResultsName("day")) # equivalent form: @@ -1418,33 +1196,30 @@ def setResultsName(self, name, listAllMatches=False): newself = self.copy() if name.endswith("*"): name = name[:-1] - listAllMatches = True + listAllMatches=True newself.resultsName = name newself.modalResults = not listAllMatches return newself - def setBreak(self, breakFlag=True): + def setBreak(self,breakFlag = True): """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set C{breakFlag} to True to enable, False to - disable. + about to be parsed. Set C{breakFlag} to True to enable, False to + disable. """ if breakFlag: _parseMethod = self._parse - def breaker(instring, loc, doActions=True, callPreParse=True): import pdb - pdb.set_trace() - return _parseMethod(instring, loc, doActions, callPreParse) - + return _parseMethod( instring, loc, doActions, callPreParse ) breaker._originalParseMethod = _parseMethod self._parse = breaker else: - if hasattr(self._parse, "_originalParseMethod"): + if hasattr(self._parse,"_originalParseMethod"): self._parse = self._parse._originalParseMethod return self - def setParseAction(self, *fns, **kwargs): + def setParseAction( self, *fns, **kwargs ): """ Define one or more actions to perform when successfully matching parse element definition. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, @@ -1464,7 +1239,7 @@ def setParseAction(self, *fns, **kwargs): on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. - + Example:: integer = Word(nums) date_str = integer + '/' + integer + '/' + integer @@ -1482,10 +1257,10 @@ def setParseAction(self, *fns, **kwargs): self.callDuringTry = kwargs.get("callDuringTry", False) return self - def addParseAction(self, *fns, **kwargs): + def addParseAction( self, *fns, **kwargs ): """ Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}}. - + See examples in L{I{copy}}. """ self.parseAction += list(map(_trim_arity, list(fns))) @@ -1493,14 +1268,14 @@ def addParseAction(self, *fns, **kwargs): return self def addCondition(self, *fns, **kwargs): - """Add a boolean predicate function to expression's list of parse actions. See - L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, + """Add a boolean predicate function to expression's list of parse actions. See + L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, functions passed to C{addCondition} need to return boolean success/fail of the condition. Optional keyword arguments: - message = define a custom message to be used in the raised exception - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException - + Example:: integer = Word(nums).setParseAction(lambda toks: int(toks[0])) year_int = integer.copy() @@ -1512,44 +1287,42 @@ def addCondition(self, *fns, **kwargs): msg = kwargs.get("message", "failed user-defined condition") exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException for fn in fns: - - def pa(s, l, t): - if not bool(_trim_arity(fn)(s, l, t)): - raise exc_type(s, l, msg) - + def pa(s,l,t): + if not bool(_trim_arity(fn)(s,l,t)): + raise exc_type(s,l,msg) self.parseAction.append(pa) self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) return self - def setFailAction(self, fn): + def setFailAction( self, fn ): """Define action to perform if parsing fails at this expression. - Fail action fn is a callable function that takes the arguments - C{fn(s,loc,expr,err)} where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw C{L{ParseFatalException}} - if it is desired to stop parsing immediately.""" + Fail action fn is a callable function that takes the arguments + C{fn(s,loc,expr,err)} where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw C{L{ParseFatalException}} + if it is desired to stop parsing immediately.""" self.failAction = fn return self - def _skipIgnorables(self, instring, loc): + def _skipIgnorables( self, instring, loc ): exprsFound = True while exprsFound: exprsFound = False for e in self.ignoreExprs: try: while 1: - loc, dummy = e._parse(instring, loc) + loc,dummy = e._parse( instring, loc ) exprsFound = True except ParseException: pass return loc - def preParse(self, instring, loc): + def preParse( self, instring, loc ): if self.ignoreExprs: - loc = self._skipIgnorables(instring, loc) + loc = self._skipIgnorables( instring, loc ) if self.skipWhitespace: wt = self.whiteChars @@ -1559,99 +1332,91 @@ def preParse(self, instring, loc): return loc - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): return loc, [] - def postParse(self, instring, loc, tokenlist): + def postParse( self, instring, loc, tokenlist ): return tokenlist - # ~ @profile - def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): - debugging = self.debug # and doActions ) + #~ @profile + def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): + debugging = ( self.debug ) #and doActions ) if debugging or self.failAction: - # ~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if self.debugActions[0]: - self.debugActions[0](instring, loc, self) + #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + if (self.debugActions[0] ): + self.debugActions[0]( instring, loc, self ) if callPreParse and self.callPreparse: - preloc = self.preParse(instring, loc) + preloc = self.preParse( instring, loc ) else: preloc = loc tokensStart = preloc try: try: - loc, tokens = self.parseImpl(instring, preloc, doActions) + loc,tokens = self.parseImpl( instring, preloc, doActions ) except IndexError: - raise ParseException(instring, len(instring), self.errmsg, self) + raise ParseException( instring, len(instring), self.errmsg, self ) except ParseBaseException as err: - # ~ print ("Exception raised:", err) + #~ print ("Exception raised:", err) if self.debugActions[2]: - self.debugActions[2](instring, tokensStart, self, err) + self.debugActions[2]( instring, tokensStart, self, err ) if self.failAction: - self.failAction(instring, tokensStart, self, err) + self.failAction( instring, tokensStart, self, err ) raise else: if callPreParse and self.callPreparse: - preloc = self.preParse(instring, loc) + preloc = self.preParse( instring, loc ) else: preloc = loc tokensStart = preloc if self.mayIndexError or loc >= len(instring): try: - loc, tokens = self.parseImpl(instring, preloc, doActions) + loc,tokens = self.parseImpl( instring, preloc, doActions ) except IndexError: - raise ParseException(instring, len(instring), self.errmsg, self) + raise ParseException( instring, len(instring), self.errmsg, self ) else: - loc, tokens = self.parseImpl(instring, preloc, doActions) + loc,tokens = self.parseImpl( instring, preloc, doActions ) - tokens = self.postParse(instring, loc, tokens) + tokens = self.postParse( instring, loc, tokens ) - retTokens = ParseResults( - tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults - ) + retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) if self.parseAction and (doActions or self.callDuringTry): if debugging: try: for fn in self.parseAction: - tokens = fn(instring, tokensStart, retTokens) + tokens = fn( instring, tokensStart, retTokens ) if tokens is not None: - retTokens = ParseResults( - tokens, - self.resultsName, - asList=self.saveAsList - and isinstance(tokens, (ParseResults, list)), - modal=self.modalResults, - ) + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) except ParseBaseException as err: - # ~ print "Exception raised in user parse action:", err - if self.debugActions[2]: - self.debugActions[2](instring, tokensStart, self, err) + #~ print "Exception raised in user parse action:", err + if (self.debugActions[2] ): + self.debugActions[2]( instring, tokensStart, self, err ) raise else: for fn in self.parseAction: - tokens = fn(instring, tokensStart, retTokens) + tokens = fn( instring, tokensStart, retTokens ) if tokens is not None: - retTokens = ParseResults( - tokens, - self.resultsName, - asList=self.saveAsList - and isinstance(tokens, (ParseResults, list)), - modal=self.modalResults, - ) + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) if debugging: - # ~ print ("Matched",self,"->",retTokens.asList()) - if self.debugActions[1]: - self.debugActions[1](instring, tokensStart, loc, self, retTokens) + #~ print ("Matched",self,"->",retTokens.asList()) + if (self.debugActions[1] ): + self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) return loc, retTokens - def tryParse(self, instring, loc): + def tryParse( self, instring, loc ): try: - return self._parse(instring, loc, doActions=False)[0] + return self._parse( instring, loc, doActions=False )[0] except ParseFatalException: - raise ParseException(instring, loc, self.errmsg, self) - + raise ParseException( instring, loc, self.errmsg, self) + def canParseNext(self, instring, loc): try: self.tryParse(instring, loc) @@ -1673,7 +1438,7 @@ def set(self, key, value): def clear(self): cache.clear() - + def cache_len(self): return len(cache) @@ -1683,7 +1448,6 @@ def cache_len(self): self.__len__ = types.MethodType(cache_len, self) if _OrderedDict is not None: - class _FifoCache(object): def __init__(self, size): self.not_in_cache = not_in_cache = object() @@ -1713,7 +1477,6 @@ def cache_len(self): self.__len__ = types.MethodType(cache_len, self) else: - class _FifoCache(object): def __init__(self, size): self.not_in_cache = not_in_cache = object() @@ -1743,15 +1506,13 @@ def cache_len(self): self.__len__ = types.MethodType(cache_len, self) # argument cache for optimizing repeated calls when backtracking through recursive expressions - packrat_cache = ( - {} - ) # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail + packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail packrat_cache_lock = RLock() packrat_cache_stats = [0, 0] # this method gets repeatedly called during backtracking with the same arguments - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache(self, instring, loc, doActions=True, callPreParse=True): + def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): HIT, MISS = 0, 1 lookup = (self, instring, loc, callPreParse, doActions) with ParserElement.packrat_cache_lock: @@ -1779,38 +1540,35 @@ def _parseCache(self, instring, loc, doActions=True, callPreParse=True): @staticmethod def resetCache(): ParserElement.packrat_cache.clear() - ParserElement.packrat_cache_stats[:] = [0] * len( - ParserElement.packrat_cache_stats - ) + ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) _packratEnabled = False - @staticmethod def enablePackrat(cache_size_limit=128): """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - Parameters: - - cache_size_limit - (default=C{128}) - if an integer value is provided - will limit the size of the packrat cache; if None is passed, then - the cache size will be unbounded; if 0 is passed, the cache will - be effectively disabled. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method C{ParserElement.enablePackrat()}. If - your program uses C{psyco} to "compile as you go", you must call - C{enablePackrat} before calling C{psyco.full()}. If you do not do this, - Python will crash. For best results, call C{enablePackrat()} immediately - after importing pyparsing. - - Example:: - import pyparsing - pyparsing.ParserElement.enablePackrat() + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + Parameters: + - cache_size_limit - (default=C{128}) - if an integer value is provided + will limit the size of the packrat cache; if None is passed, then + the cache size will be unbounded; if 0 is passed, the cache will + be effectively disabled. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method C{ParserElement.enablePackrat()}. If + your program uses C{psyco} to "compile as you go", you must call + C{enablePackrat} before calling C{psyco.full()}. If you do not do this, + Python will crash. For best results, call C{enablePackrat()} immediately + after importing pyparsing. + + Example:: + import pyparsing + pyparsing.ParserElement.enablePackrat() """ if not ParserElement._packratEnabled: ParserElement._packratEnabled = True @@ -1820,7 +1578,7 @@ def enablePackrat(cache_size_limit=128): ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) ParserElement._parse = ParserElement._parseCache - def parseString(self, instring, parseAll=False): + def parseString( self, instring, parseAll=False ): """ Execute the parse expression with the given string. This is the main interface to the client code, once the complete @@ -1842,7 +1600,7 @@ def parseString(self, instring, parseAll=False): reference the input string using the parse action's C{s} argument - explicitly expand the tabs in your input string before calling C{parseString} - + Example:: Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text @@ -1850,17 +1608,17 @@ def parseString(self, instring, parseAll=False): ParserElement.resetCache() if not self.streamlined: self.streamline() - # ~ self.saveAsList = True + #~ self.saveAsList = True for e in self.ignoreExprs: e.streamline() if not self.keepTabs: instring = instring.expandtabs() try: - loc, tokens = self._parse(instring, 0) + loc, tokens = self._parse( instring, 0 ) if parseAll: - loc = self.preParse(instring, loc) + loc = self.preParse( instring, loc ) se = Empty() + StringEnd() - se._parse(instring, loc) + se._parse( instring, loc ) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -1870,7 +1628,7 @@ def parseString(self, instring, parseAll=False): else: return tokens - def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): + def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): """ Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location. May be called with optional @@ -1887,9 +1645,9 @@ def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): for tokens,start,end in Word(alphas).scanString(source): print(' '*start + '^'*(end-start)) print(' '*start + tokens[0]) - + prints:: - + sldjf123lsdjjkf345sldkjf879lkjsfd987 ^^^^^ sldjf @@ -1916,16 +1674,16 @@ def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): try: while loc <= instrlen and matches < maxMatches: try: - preloc = preparseFn(instring, loc) - nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) + preloc = preparseFn( instring, loc ) + nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) except ParseException: - loc = preloc + 1 + loc = preloc+1 else: if nextLoc > loc: matches += 1 yield tokens, preloc, nextLoc if overlap: - nextloc = preparseFn(instring, loc) + nextloc = preparseFn( instring, loc ) if nextloc > loc: loc = nextLoc else: @@ -1933,7 +1691,7 @@ def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): else: loc = nextLoc else: - loc = preloc + 1 + loc = preloc+1 except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -1941,7 +1699,7 @@ def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc - def transformString(self, instring): + def transformString( self, instring ): """ Extension to C{L{scanString}}, to modify matching text with modified tokens that may be returned from a parse action. To use C{transformString}, define a grammar and @@ -1949,11 +1707,11 @@ def transformString(self, instring): Invoking C{transformString()} on a target string will then scan for matches, and replace the matched text patterns according to the logic in the parse action. C{transformString()} returns the resulting transformed string. - + Example:: wd = Word(alphas) wd.setParseAction(lambda toks: toks[0].title()) - + print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) Prints:: Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. @@ -1964,19 +1722,19 @@ def transformString(self, instring): # keep string locs straight between transformString and scanString self.keepTabs = True try: - for t, s, e in self.scanString(instring): - out.append(instring[lastE:s]) + for t,s,e in self.scanString( instring ): + out.append( instring[lastE:s] ) if t: - if isinstance(t, ParseResults): + if isinstance(t,ParseResults): out += t.asList() - elif isinstance(t, list): + elif isinstance(t,list): out += t else: out.append(t) lastE = e out.append(instring[lastE:]) out = [o for o in out if o] - return "".join(map(_ustr, _flatten(out))) + return "".join(map(_ustr,_flatten(out))) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -1984,16 +1742,16 @@ def transformString(self, instring): # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc - def searchString(self, instring, maxMatches=_MAX_INT): + def searchString( self, instring, maxMatches=_MAX_INT ): """ Another extension to C{L{scanString}}, simplifying the access to the tokens found to match the given parse expression. May be called with optional C{maxMatches} argument, to clip searching after 'n' matches are found. - + Example:: # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters cap_word = Word(alphas.upper(), alphas.lower()) - + print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) # the sum() builtin can be used to merge results into a single ParseResults object @@ -2003,9 +1761,7 @@ def searchString(self, instring, maxMatches=_MAX_INT): ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] """ try: - return ParseResults( - [t for t, s, e in self.scanString(instring, maxMatches)] - ) + return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise @@ -2019,8 +1775,8 @@ def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): May be called with optional C{maxsplit} argument, to limit the number of splits; and the optional C{includeSeparators} argument (default=C{False}), if the separating matching text should be included in the split results. - - Example:: + + Example:: punc = oneOf(list(".,;:/-!?")) print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) prints:: @@ -2028,18 +1784,18 @@ def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): """ splits = 0 last = 0 - for t, s, e in self.scanString(instring, maxMatches=maxsplit): + for t,s,e in self.scanString(instring, maxMatches=maxsplit): yield instring[last:s] if includeSeparators: yield t[0] last = e yield instring[last:] - def __add__(self, other): + def __add__(self, other ): """ Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement converts them to L{Literal}s by default. - + Example:: greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" @@ -2047,29 +1803,23 @@ def __add__(self, other): Prints:: Hello, World! -> ['Hello', ',', 'World', '!'] """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None - return And([self, other]) + return And( [ self, other ] ) - def __radd__(self, other): + def __radd__(self, other ): """ Implementation of + operator when left operand is not a C{L{ParserElement}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None return other + self @@ -2077,33 +1827,27 @@ def __sub__(self, other): """ Implementation of - operator, returns C{L{And}} with error stop """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None return self + And._ErrorStop() + other - def __rsub__(self, other): + def __rsub__(self, other ): """ Implementation of - operator when left operand is not a C{L{ParserElement}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None return other - self - def __mul__(self, other): + def __mul__(self,other): """ Implementation of * operator, allows use of C{expr * 3} in place of C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer @@ -2123,190 +1867,162 @@ def __mul__(self, other): occurrences. If this behavior is desired, then write C{expr*(None,n) + ~expr} """ - if isinstance(other, int): - minElements, optElements = other, 0 - elif isinstance(other, tuple): + if isinstance(other,int): + minElements, optElements = other,0 + elif isinstance(other,tuple): other = (other + (None, None))[:2] if other[0] is None: other = (0, other[1]) - if isinstance(other[0], int) and other[1] is None: + if isinstance(other[0],int) and other[1] is None: if other[0] == 0: return ZeroOrMore(self) if other[0] == 1: return OneOrMore(self) else: - return self * other[0] + ZeroOrMore(self) - elif isinstance(other[0], int) and isinstance(other[1], int): + return self*other[0] + ZeroOrMore(self) + elif isinstance(other[0],int) and isinstance(other[1],int): minElements, optElements = other optElements -= minElements else: - raise TypeError( - "cannot multiply 'ParserElement' and ('%s','%s') objects", - type(other[0]), - type(other[1]), - ) + raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) else: - raise TypeError( - "cannot multiply 'ParserElement' and '%s' objects", type(other) - ) + raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) if minElements < 0: raise ValueError("cannot multiply ParserElement by negative value") if optElements < 0: - raise ValueError( - "second tuple value must be greater or equal to first tuple value" - ) + raise ValueError("second tuple value must be greater or equal to first tuple value") if minElements == optElements == 0: raise ValueError("cannot multiply ParserElement by 0 or (0,0)") - if optElements: - + if (optElements): def makeOptionalList(n): - if n > 1: - return Optional(self + makeOptionalList(n - 1)) + if n>1: + return Optional(self + makeOptionalList(n-1)) else: return Optional(self) - if minElements: if minElements == 1: ret = self + makeOptionalList(optElements) else: - ret = And([self] * minElements) + makeOptionalList(optElements) + ret = And([self]*minElements) + makeOptionalList(optElements) else: ret = makeOptionalList(optElements) else: if minElements == 1: ret = self else: - ret = And([self] * minElements) + ret = And([self]*minElements) return ret def __rmul__(self, other): return self.__mul__(other) - def __or__(self, other): + def __or__(self, other ): """ Implementation of | operator - returns C{L{MatchFirst}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None - return MatchFirst([self, other]) + return MatchFirst( [ self, other ] ) - def __ror__(self, other): + def __ror__(self, other ): """ Implementation of | operator when left operand is not a C{L{ParserElement}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None return other | self - def __xor__(self, other): + def __xor__(self, other ): """ Implementation of ^ operator - returns C{L{Or}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None - return Or([self, other]) + return Or( [ self, other ] ) - def __rxor__(self, other): + def __rxor__(self, other ): """ Implementation of ^ operator when left operand is not a C{L{ParserElement}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None return other ^ self - def __and__(self, other): + def __and__(self, other ): """ Implementation of & operator - returns C{L{Each}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None - return Each([self, other]) + return Each( [ self, other ] ) - def __rand__(self, other): + def __rand__(self, other ): """ Implementation of & operator when left operand is not a C{L{ParserElement}} """ - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - if not isinstance(other, ParserElement): - warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2, - ) + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) return None return other & self - def __invert__(self): + def __invert__( self ): """ Implementation of ~ operator - returns C{L{NotAny}} """ - return NotAny(self) + return NotAny( self ) def __call__(self, name=None): """ Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. - + If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be passed as C{True}. - + If C{name} is omitted, same as calling C{L{copy}}. Example:: # these are equivalent userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") """ if name is not None: return self.setResultsName(name) else: return self.copy() - def suppress(self): + def suppress( self ): """ Suppresses the output of this C{ParserElement}; useful to keep punctuation from cluttering up returned output. """ - return Suppress(self) + return Suppress( self ) - def leaveWhitespace(self): + def leaveWhitespace( self ): """ Disables the skipping of whitespace before matching the characters in the C{ParserElement}'s defined pattern. This is normally only used internally by @@ -2315,7 +2031,7 @@ def leaveWhitespace(self): self.skipWhitespace = False return self - def setWhitespaceChars(self, chars): + def setWhitespaceChars( self, chars ): """ Overrides the default whitespace chars """ @@ -2324,7 +2040,7 @@ def setWhitespaceChars(self, chars): self.copyDefaultWhiteChars = False return self - def parseWithTabs(self): + def parseWithTabs( self ): """ Overrides default behavior to expand C{}s to spaces before parsing the input string. Must be called before C{parseString} when the input grammar contains elements that @@ -2333,42 +2049,40 @@ def parseWithTabs(self): self.keepTabs = True return self - def ignore(self, other): + def ignore( self, other ): """ Define expression to be ignored (e.g., comments) while doing pattern matching; may be called repeatedly, to define multiple comment or other ignorable patterns. - + Example:: patt = OneOrMore(Word(alphas)) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] - + patt.ignore(cStyleComment) patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] """ if isinstance(other, basestring): other = Suppress(other) - if isinstance(other, Suppress): + if isinstance( other, Suppress ): if other not in self.ignoreExprs: self.ignoreExprs.append(other) else: - self.ignoreExprs.append(Suppress(other.copy())) + self.ignoreExprs.append( Suppress( other.copy() ) ) return self - def setDebugActions(self, startAction, successAction, exceptionAction): + def setDebugActions( self, startAction, successAction, exceptionAction ): """ Enable display of debugging messages while doing pattern matching. """ - self.debugActions = ( - startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction, - ) + self.debugActions = (startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, + exceptionAction or _defaultExceptionDebugAction) self.debug = True return self - def setDebug(self, flag=True): + def setDebug( self, flag=True ): """ Enable display of debugging messages while doing pattern matching. Set C{flag} to True to enable, False to disable. @@ -2377,12 +2091,12 @@ def setDebug(self, flag=True): wd = Word(alphas).setName("alphaword") integer = Word(nums).setName("numword") term = wd | integer - + # turn on debugging for wd wd.setDebug() OneOrMore(term).parseString("abc 123 xyz 890") - + prints:: Match alphaword at loc 0(1,1) Matched alphaword -> ['abc'] @@ -2404,36 +2118,32 @@ def setDebug(self, flag=True): name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. """ if flag: - self.setDebugActions( - _defaultStartDebugAction, - _defaultSuccessDebugAction, - _defaultExceptionDebugAction, - ) + self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) else: self.debug = False return self - def __str__(self): + def __str__( self ): return self.name - def __repr__(self): + def __repr__( self ): return _ustr(self) - def streamline(self): + def streamline( self ): self.streamlined = True self.strRepr = None return self - def checkRecursion(self, parseElementList): + def checkRecursion( self, parseElementList ): pass - def validate(self, validateTrace=[]): + def validate( self, validateTrace=[] ): """ Check defined expressions for valid structure, check for infinite recursive definitions. """ - self.checkRecursion([]) + self.checkRecursion( [] ) - def parseFile(self, file_or_filename, parseAll=False): + def parseFile( self, file_or_filename, parseAll=False ): """ Execute the parse expression on the given file or filename. If a filename is specified (instead of a file object), @@ -2453,35 +2163,35 @@ def parseFile(self, file_or_filename, parseAll=False): # catch and re-raise exception from here, clears out pyparsing internal stack trace raise exc - def __eq__(self, other): + def __eq__(self,other): if isinstance(other, ParserElement): return self is other or vars(self) == vars(other) elif isinstance(other, basestring): return self.matches(other) else: - return super(ParserElement, self) == other + return super(ParserElement,self)==other - def __ne__(self, other): + def __ne__(self,other): return not (self == other) def __hash__(self): return hash(id(self)) - def __req__(self, other): + def __req__(self,other): return self == other - def __rne__(self, other): + def __rne__(self,other): return not (self == other) def matches(self, testString, parseAll=True): """ - Method for quick testing of a parser against a test string. Good for simple + Method for quick testing of a parser against a test string. Good for simple inline microtests of sub expressions while building up larger parser. - + Parameters: - testString - to test against this expression for a match - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - + Example:: expr = Word(nums) assert expr.matches("100") @@ -2491,25 +2201,17 @@ def matches(self, testString, parseAll=True): return True except ParseBaseException: return False - - def runTests( - self, - tests, - parseAll=True, - comment="#", - fullDump=True, - printResults=True, - failureTests=False, - ): + + def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): """ Execute the parse expression on a series of test strings, showing each test, the parsed results or where the parse failed. Quick and easy way to run a parse expression against a list of sample strings. - + Parameters: - tests - a list of separate test strings, or a multiline string of test strings - - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests - - comment - (default=C{'#'}) - expression for indicating embedded comments in the test + - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests + - comment - (default=C{'#'}) - expression for indicating embedded comments in the test string; pass None to disable comment filtering - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; if False, only dump nested list @@ -2517,9 +2219,9 @@ def runTests( - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing Returns: a (success, results) tuple, where success indicates that all tests succeeded - (or failed if C{failureTests} is True), and the results contain a list of lines of each + (or failed if C{failureTests} is True), and the results contain a list of lines of each test's output - + Example:: number_expr = pyparsing_common.number.copy() @@ -2562,7 +2264,7 @@ def runTests( [1e-12] Success - + # stray character 100Z ^ @@ -2584,7 +2286,7 @@ def runTests( lines, create a test like this:: expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") - + (Note that this is a raw string literal, you must include the leading 'r'.) """ if isinstance(tests, basestring): @@ -2600,20 +2302,20 @@ def runTests( continue if not t: continue - out = ["\n".join(comments), t] + out = ['\n'.join(comments), t] comments = [] try: - t = t.replace(r"\n", "\n") + t = t.replace(r'\n','\n') result = self.parseString(t, parseAll=parseAll) out.append(result.dump(full=fullDump)) success = success and not failureTests except ParseBaseException as pe: fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" - if "\n" in t: + if '\n' in t: out.append(line(pe.loc, t)) - out.append(" " * (col(pe.loc, t) - 1) + "^" + fatal) + out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) else: - out.append(" " * pe.loc + "^" + fatal) + out.append(' '*pe.loc + '^' + fatal) out.append("FAIL: " + str(pe)) success = success and failureTests result = pe @@ -2624,30 +2326,28 @@ def runTests( if printResults: if fullDump: - out.append("") - print("\n".join(out)) + out.append('') + print('\n'.join(out)) allResults.append((t, result)) - + return success, allResults - + class Token(ParserElement): """ Abstract C{ParserElement} subclass, for defining atomic matching patterns. """ - - def __init__(self): - super(Token, self).__init__(savelist=False) + def __init__( self ): + super(Token,self).__init__( savelist=False ) class Empty(Token): """ An empty token, will always match. """ - - def __init__(self): - super(Empty, self).__init__() + def __init__( self ): + super(Empty,self).__init__() self.name = "Empty" self.mayReturnEmpty = True self.mayIndexError = False @@ -2657,45 +2357,40 @@ class NoMatch(Token): """ A token that will never match. """ - - def __init__(self): - super(NoMatch, self).__init__() + def __init__( self ): + super(NoMatch,self).__init__() self.name = "NoMatch" self.mayReturnEmpty = True self.mayIndexError = False self.errmsg = "Unmatchable token" - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): raise ParseException(instring, loc, self.errmsg, self) class Literal(Token): """ Token to exactly match a specified string. - + Example:: Literal('blah').parseString('blah') # -> ['blah'] Literal('blah').parseString('blahfooblah') # -> ['blah'] Literal('blah').parseString('bla') # -> Exception: Expected "blah" - + For case-insensitive matching, use L{CaselessLiteral}. - + For keyword matching (force word break before and after the matched string), use L{Keyword} or L{CaselessKeyword}. """ - - def __init__(self, matchString): - super(Literal, self).__init__() + def __init__( self, matchString ): + super(Literal,self).__init__() self.match = matchString self.matchLen = len(matchString) try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn( - "null string passed to Literal; use Empty() instead", - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("null string passed to Literal; use Empty() instead", + SyntaxWarning, stacklevel=2) self.__class__ = Empty self.name = '"%s"' % _ustr(self.match) self.errmsg = "Expected " + self.name @@ -2705,19 +2400,15 @@ def __init__(self, matchString): # Performance tuning: this routine gets called a *lot* # if this is a single character match string and the first character matches, # short-circuit as quickly as possible, and avoid calling startswith - # ~ @profile - def parseImpl(self, instring, loc, doActions=True): - if instring[loc] == self.firstMatchChar and ( - self.matchLen == 1 or instring.startswith(self.match, loc) - ): - return loc + self.matchLen, self.match + #~ @profile + def parseImpl( self, instring, loc, doActions=True ): + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) ): + return loc+self.matchLen, self.match raise ParseException(instring, loc, self.errmsg, self) - - _L = Literal ParserElement._literalStringClass = Literal - class Keyword(Token): """ Token to exactly match a specified string as a keyword, that is, it must be @@ -2728,18 +2419,17 @@ class Keyword(Token): - C{identChars} is a string of characters that would be valid identifier characters, defaulting to all alphanumerics + "_" and "$" - C{caseless} allows case-insensitive matching, default is C{False}. - + Example:: Keyword("start").parseString("start") # -> ['start'] Keyword("start").parseString("starting") # -> Exception For case-insensitive matching, use L{CaselessKeyword}. """ + DEFAULT_KEYWORD_CHARS = alphanums+"_$" - DEFAULT_KEYWORD_CHARS = alphanums + "_$" - - def __init__(self, matchString, identChars=None, caseless=False): - super(Keyword, self).__init__() + def __init__( self, matchString, identChars=None, caseless=False ): + super(Keyword,self).__init__() if identChars is None: identChars = Keyword.DEFAULT_KEYWORD_CHARS self.match = matchString @@ -2747,11 +2437,8 @@ def __init__(self, matchString, identChars=None, caseless=False): try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn( - "null string passed to Keyword; use Empty() instead", - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("null string passed to Keyword; use Empty() instead", + SyntaxWarning, stacklevel=2) self.name = '"%s"' % self.match self.errmsg = "Expected " + self.name self.mayReturnEmpty = False @@ -2762,41 +2449,31 @@ def __init__(self, matchString, identChars=None, caseless=False): identChars = identChars.upper() self.identChars = set(identChars) - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): if self.caseless: - if ( - (instring[loc : loc + self.matchLen].upper() == self.caselessmatch) - and ( - loc >= len(instring) - self.matchLen - or instring[loc + self.matchLen].upper() not in self.identChars - ) - and (loc == 0 or instring[loc - 1].upper() not in self.identChars) - ): - return loc + self.matchLen, self.match + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and + (loc == 0 or instring[loc-1].upper() not in self.identChars) ): + return loc+self.matchLen, self.match else: - if ( - instring[loc] == self.firstMatchChar - and (self.matchLen == 1 or instring.startswith(self.match, loc)) - and ( - loc >= len(instring) - self.matchLen - or instring[loc + self.matchLen] not in self.identChars - ) - and (loc == 0 or instring[loc - 1] not in self.identChars) - ): - return loc + self.matchLen, self.match + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and + (loc == 0 or instring[loc-1] not in self.identChars) ): + return loc+self.matchLen, self.match raise ParseException(instring, loc, self.errmsg, self) def copy(self): - c = super(Keyword, self).copy() + c = super(Keyword,self).copy() c.identChars = Keyword.DEFAULT_KEYWORD_CHARS return c @staticmethod - def setDefaultKeywordChars(chars): - """Overrides the default Keyword chars""" + def setDefaultKeywordChars( chars ): + """Overrides the default Keyword chars + """ Keyword.DEFAULT_KEYWORD_CHARS = chars - class CaselessLiteral(Literal): """ Token to match a specified string, ignoring case of letters. @@ -2805,58 +2482,52 @@ class CaselessLiteral(Literal): Example:: OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] - + (Contrast with example for L{CaselessKeyword}.) """ - - def __init__(self, matchString): - super(CaselessLiteral, self).__init__(matchString.upper()) + def __init__( self, matchString ): + super(CaselessLiteral,self).__init__( matchString.upper() ) # Preserve the defining literal. self.returnString = matchString self.name = "'%s'" % self.returnString self.errmsg = "Expected " + self.name - def parseImpl(self, instring, loc, doActions=True): - if instring[loc : loc + self.matchLen].upper() == self.match: - return loc + self.matchLen, self.returnString + def parseImpl( self, instring, loc, doActions=True ): + if instring[ loc:loc+self.matchLen ].upper() == self.match: + return loc+self.matchLen, self.returnString raise ParseException(instring, loc, self.errmsg, self) - class CaselessKeyword(Keyword): """ Caseless version of L{Keyword}. Example:: OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] - + (Contrast with example for L{CaselessLiteral}.) """ + def __init__( self, matchString, identChars=None ): + super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) - def __init__(self, matchString, identChars=None): - super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True) - - def parseImpl(self, instring, loc, doActions=True): - if (instring[loc : loc + self.matchLen].upper() == self.caselessmatch) and ( - loc >= len(instring) - self.matchLen - or instring[loc + self.matchLen].upper() not in self.identChars - ): - return loc + self.matchLen, self.match + def parseImpl( self, instring, loc, doActions=True ): + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): + return loc+self.matchLen, self.match raise ParseException(instring, loc, self.errmsg, self) - class CloseMatch(Token): """ - A variation on L{Literal} which matches "close" matches, that is, + A variation on L{Literal} which matches "close" matches, that is, strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: - C{match_string} - string to be matched - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match - + The results from a successful parse will contain the matched text from the input string and the following named results: - C{mismatches} - a list of the positions within the match_string where mismatches were found - C{original} - the original match_string used to compare against the input string - + If C{mismatches} is an empty list, then the match was an exact match. - + Example:: patt = CloseMatch("ATCATCGAATGGA") patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) @@ -2869,20 +2540,16 @@ class CloseMatch(Token): patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) """ - def __init__(self, match_string, maxMismatches=1): - super(CloseMatch, self).__init__() + super(CloseMatch,self).__init__() self.name = match_string self.match_string = match_string self.maxMismatches = maxMismatches - self.errmsg = "Expected %r (with up to %d mismatches)" % ( - self.match_string, - self.maxMismatches, - ) + self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) self.mayIndexError = False self.mayReturnEmpty = False - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): start = loc instrlen = len(instring) maxloc = start + len(self.match_string) @@ -2893,10 +2560,8 @@ def parseImpl(self, instring, loc, doActions=True): mismatches = [] maxMismatches = self.maxMismatches - for match_stringloc, s_m in enumerate( - zip(instring[loc:maxloc], self.match_string) - ): - src, mat = s_m + for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): + src,mat = s_m if src != mat: mismatches.append(match_stringloc) if len(mismatches) > maxMismatches: @@ -2904,8 +2569,8 @@ def parseImpl(self, instring, loc, doActions=True): else: loc = match_stringloc + 1 results = ParseResults([instring[start:loc]]) - results["original"] = self.match_string - results["mismatches"] = mismatches + results['original'] = self.match_string + results['mismatches'] = mismatches return loc, results raise ParseException(instring, loc, self.errmsg, self) @@ -2920,14 +2585,14 @@ class Word(Token): maximum, and/or exact length. The default value for C{min} is 1 (a minimum value < 1 is not valid); the default values for C{max} and C{exact} are 0, meaning no maximum or exact length restriction. An optional - C{excludeChars} parameter can list characters that might be found in + C{excludeChars} parameter can list characters that might be found in the input C{bodyChars} string; useful to define a word of all printables except for one or two characters, for instance. - - L{srange} is useful for defining custom character set strings for defining + + L{srange} is useful for defining custom character set strings for defining C{Word} expressions, using range notation from regular expression character sets. - - A common mistake is to use C{Word} to match a specific literal string, as in + + A common mistake is to use C{Word} to match a specific literal string, as in C{Word("Address")}. Remember that C{Word} uses the string argument to define I{sets} of matchable characters. This expression would match "Add", "AAA", "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. @@ -2945,38 +2610,28 @@ class Word(Token): Example:: # a word composed of digits integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) - + # a word with a leading capital, and zero or more lowercase capital_word = Word(alphas.upper(), alphas.lower()) # hostnames are alphanumeric, with leading alpha, and '-' hostname = Word(alphas, alphanums+'-') - + # roman numeral (not a strict parser, accepts invalid mix of characters) roman = Word("IVXLCDM") - + # any string of non-whitespace characters, except for ',' csv_value = Word(printables, excludeChars=",") """ - - def __init__( - self, - initChars, - bodyChars=None, - min=1, - max=0, - exact=0, - asKeyword=False, - excludeChars=None, - ): - super(Word, self).__init__() + def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): + super(Word,self).__init__() if excludeChars: - initChars = "".join(c for c in initChars if c not in excludeChars) + initChars = ''.join(c for c in initChars if c not in excludeChars) if bodyChars: - bodyChars = "".join(c for c in bodyChars if c not in excludeChars) + bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) self.initCharsOrig = initChars self.initChars = set(initChars) - if bodyChars: + if bodyChars : self.bodyCharsOrig = bodyChars self.bodyChars = set(bodyChars) else: @@ -2986,10 +2641,7 @@ def __init__( self.maxSpecified = max > 0 if min < 1: - raise ValueError( - "cannot specify a minimum length < 1; use Optional(Word()) if" - " zero-length word is permitted" - ) + raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") self.minLen = min @@ -3007,38 +2659,34 @@ def __init__( self.mayIndexError = False self.asKeyword = asKeyword - if " " not in self.initCharsOrig + self.bodyCharsOrig and ( - min == 1 and max == 0 and exact == 0 - ): + if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): if self.bodyCharsOrig == self.initCharsOrig: self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) elif len(self.initCharsOrig) == 1: - self.reString = "%s[%s]*" % ( - re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig), - ) + self.reString = "%s[%s]*" % \ + (re.escape(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) else: - self.reString = "[%s][%s]*" % ( - _escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig), - ) + self.reString = "[%s][%s]*" % \ + (_escapeRegexRangeChars(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) if self.asKeyword: - self.reString = r"\b" + self.reString + r"\b" + self.reString = r"\b"+self.reString+r"\b" try: - self.re = re.compile(self.reString) + self.re = re.compile( self.reString ) except Exception: self.re = None - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): if self.re: - result = self.re.match(instring, loc) + result = self.re.match(instring,loc) if not result: raise ParseException(instring, loc, self.errmsg, self) loc = result.end() return loc, result.group() - if not (instring[loc] in self.initChars): + if not(instring[ loc ] in self.initChars): raise ParseException(instring, loc, self.errmsg, self) start = loc @@ -3046,7 +2694,7 @@ def parseImpl(self, instring, loc, doActions=True): instrlen = len(instring) bodychars = self.bodyChars maxloc = start + self.maxLen - maxloc = min(maxloc, instrlen) + maxloc = min( maxloc, instrlen ) while loc < maxloc and instring[loc] in bodychars: loc += 1 @@ -3056,9 +2704,7 @@ def parseImpl(self, instring, loc, doActions=True): if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: throwException = True if self.asKeyword: - if (start > 0 and instring[start - 1] in bodychars) or ( - loc < instrlen and instring[loc] in bodychars - ): + if (start>0 and instring[start-1] in bodychars) or (loc 4: - return s[:4] + "..." + if len(s)>4: + return s[:4]+"..." else: return s - if self.initCharsOrig != self.bodyCharsOrig: - self.strRepr = "W:(%s,%s)" % ( - charsAsStr(self.initCharsOrig), - charsAsStr(self.bodyCharsOrig), - ) + if ( self.initCharsOrig != self.bodyCharsOrig ): + self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) else: self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) @@ -3095,7 +2739,7 @@ class Regex(Token): r""" Token for matching strings that match a given regular expression. Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. - If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as + If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as named parse results. Example:: @@ -3105,18 +2749,14 @@ class Regex(Token): roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") """ compiledREtype = type(re.compile("[A-Z]")) - - def __init__(self, pattern, flags=0): + def __init__( self, pattern, flags=0): """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" - super(Regex, self).__init__() + super(Regex,self).__init__() if isinstance(pattern, basestring): if not pattern: - warnings.warn( - "null string passed to Regex; use Empty() instead", - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("null string passed to Regex; use Empty() instead", + SyntaxWarning, stacklevel=2) self.pattern = pattern self.flags = flags @@ -3125,30 +2765,26 @@ def __init__(self, pattern, flags=0): self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error: - warnings.warn( - "invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, + SyntaxWarning, stacklevel=2) raise elif isinstance(pattern, Regex.compiledREtype): self.re = pattern - self.pattern = self.reString = str(pattern) + self.pattern = \ + self.reString = str(pattern) self.flags = flags - + else: - raise ValueError( - "Regex may only be constructed with a string or a compiled RE object" - ) + raise ValueError("Regex may only be constructed with a string or a compiled RE object") self.name = _ustr(self) self.errmsg = "Expected " + self.name self.mayIndexError = False self.mayReturnEmpty = True - def parseImpl(self, instring, loc, doActions=True): - result = self.re.match(instring, loc) + def parseImpl( self, instring, loc, doActions=True ): + result = self.re.match(instring,loc) if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -3158,11 +2794,11 @@ def parseImpl(self, instring, loc, doActions=True): if d: for k in d: ret[k] = d[k] - return loc, ret + return loc,ret - def __str__(self): + def __str__( self ): try: - return super(Regex, self).__str__() + return super(Regex,self).__str__() except Exception: pass @@ -3175,7 +2811,7 @@ def __str__(self): class QuotedString(Token): r""" Token for matching strings that are delimited by quoting characters. - + Defined with the following parameters: - quoteChar - string of one or more characters defining the quote delimiting string - escChar - character to escape quotes, typically backslash (default=C{None}) @@ -3197,25 +2833,13 @@ class QuotedString(Token): [['This is the "quote"']] [['This is the quote with "embedded" quotes']] """ - - def __init__( - self, - quoteChar, - escChar=None, - escQuote=None, - multiline=False, - unquoteResults=True, - endQuoteChar=None, - convertWhitespaceEscapes=True, - ): - super(QuotedString, self).__init__() + def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): + super(QuotedString,self).__init__() # remove white space from quote chars - won't work anyway quoteChar = quoteChar.strip() if not quoteChar: - warnings.warn( - "quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2 - ) + warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) raise SyntaxError() if endQuoteChar is None: @@ -3223,11 +2847,7 @@ def __init__( else: endQuoteChar = endQuoteChar.strip() if not endQuoteChar: - warnings.warn( - "endQuoteChar cannot be the empty string", - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) raise SyntaxError() self.quoteChar = quoteChar @@ -3242,47 +2862,35 @@ def __init__( if multiline: self.flags = re.MULTILINE | re.DOTALL - self.pattern = r"%s(?:[^%s%s]" % ( - re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or ""), - ) + self.pattern = r'%s(?:[^%s%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) else: self.flags = 0 - self.pattern = r"%s(?:[^%s\n\r%s]" % ( - re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or ""), - ) + self.pattern = r'%s(?:[^%s\n\r%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) if len(self.endQuoteChar) > 1: self.pattern += ( - "|(?:" - + ")|(?:".join( - "%s[^%s]" - % ( - re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i]), - ) - for i in range(len(self.endQuoteChar) - 1, 0, -1) + '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), + _escapeRegexRangeChars(self.endQuoteChar[i])) + for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' ) - + ")" - ) if escQuote: - self.pattern += r"|(?:%s)" % re.escape(escQuote) + self.pattern += (r'|(?:%s)' % re.escape(escQuote)) if escChar: - self.pattern += r"|(?:%s.)" % re.escape(escChar) - self.escCharReplacePattern = re.escape(self.escChar) + "(.)" - self.pattern += r")*%s" % re.escape(self.endQuoteChar) + self.pattern += (r'|(?:%s.)' % re.escape(escChar)) + self.escCharReplacePattern = re.escape(self.escChar)+"(.)" + self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) try: self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error: - warnings.warn( - "invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, + SyntaxWarning, stacklevel=2) raise self.name = _ustr(self) @@ -3290,12 +2898,8 @@ def __init__( self.mayIndexError = False self.mayReturnEmpty = True - def parseImpl(self, instring, loc, doActions=True): - result = ( - instring[loc] == self.firstQuoteChar - and self.re.match(instring, loc) - or None - ) + def parseImpl( self, instring, loc, doActions=True ): + result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -3305,18 +2909,18 @@ def parseImpl(self, instring, loc, doActions=True): if self.unquoteResults: # strip off quotes - ret = ret[self.quoteCharLen : -self.endQuoteCharLen] + ret = ret[self.quoteCharLen:-self.endQuoteCharLen] - if isinstance(ret, basestring): + if isinstance(ret,basestring): # replace escaped whitespace - if "\\" in ret and self.convertWhitespaceEscapes: + if '\\' in ret and self.convertWhitespaceEscapes: ws_map = { - r"\t": "\t", - r"\n": "\n", - r"\f": "\f", - r"\r": "\r", + r'\t' : '\t', + r'\n' : '\n', + r'\f' : '\f', + r'\r' : '\r', } - for wslit, wschar in ws_map.items(): + for wslit,wschar in ws_map.items(): ret = ret.replace(wslit, wschar) # replace escaped characters @@ -3329,17 +2933,14 @@ def parseImpl(self, instring, loc, doActions=True): return loc, ret - def __str__(self): + def __str__( self ): try: - return super(QuotedString, self).__str__() + return super(QuotedString,self).__str__() except Exception: pass if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % ( - self.quoteChar, - self.endQuoteChar, - ) + self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) return self.strRepr @@ -3360,17 +2961,13 @@ class CharsNotIn(Token): prints:: ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] """ - - def __init__(self, notChars, min=1, max=0, exact=0): - super(CharsNotIn, self).__init__() + def __init__( self, notChars, min=1, max=0, exact=0 ): + super(CharsNotIn,self).__init__() self.skipWhitespace = False self.notChars = notChars if min < 1: - raise ValueError( - "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if" - " zero-length char group is permitted" - ) + raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") self.minLen = min @@ -3385,18 +2982,19 @@ def __init__(self, notChars, min=1, max=0, exact=0): self.name = _ustr(self) self.errmsg = "Expected " + self.name - self.mayReturnEmpty = self.minLen == 0 + self.mayReturnEmpty = ( self.minLen == 0 ) self.mayIndexError = False - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): if instring[loc] in self.notChars: raise ParseException(instring, loc, self.errmsg, self) start = loc loc += 1 notchars = self.notChars - maxlen = min(start + self.maxLen, len(instring)) - while loc < maxlen and (instring[loc] not in notchars): + maxlen = min( start+self.maxLen, len(instring) ) + while loc < maxlen and \ + (instring[loc] not in notchars): loc += 1 if loc - start < self.minLen: @@ -3404,7 +3002,7 @@ def parseImpl(self, instring, loc, doActions=True): return loc, instring[start:loc] - def __str__(self): + def __str__( self ): try: return super(CharsNotIn, self).__str__() except Exception: @@ -3418,7 +3016,6 @@ def __str__(self): return self.strRepr - class White(Token): """ Special matching class for matching whitespace. Normally, whitespace is ignored @@ -3427,23 +3024,19 @@ class White(Token): matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, as defined for the C{L{Word}} class. """ - whiteStrs = { - " ": "", + " " : "", "\t": "", "\n": "", "\r": "", "\f": "", - } - + } def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White, self).__init__() + super(White,self).__init__() self.matchWhite = ws - self.setWhitespaceChars( - "".join(c for c in self.whiteChars if c not in self.matchWhite) - ) - # ~ self.leaveWhitespace() - self.name = "".join(White.whiteStrs[c] for c in self.matchWhite) + self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) + #~ self.leaveWhitespace() + self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) self.mayReturnEmpty = True self.errmsg = "Expected " + self.name @@ -3458,13 +3051,13 @@ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): self.maxLen = exact self.minLen = exact - def parseImpl(self, instring, loc, doActions=True): - if not (instring[loc] in self.matchWhite): + def parseImpl( self, instring, loc, doActions=True ): + if not(instring[ loc ] in self.matchWhite): raise ParseException(instring, loc, self.errmsg, self) start = loc loc += 1 maxloc = start + self.maxLen - maxloc = min(maxloc, len(instring)) + maxloc = min( maxloc, len(instring) ) while loc < maxloc and instring[loc] in self.matchWhite: loc += 1 @@ -3475,50 +3068,44 @@ def parseImpl(self, instring, loc, doActions=True): class _PositionToken(Token): - def __init__(self): - super(_PositionToken, self).__init__() - self.name = self.__class__.__name__ + def __init__( self ): + super(_PositionToken,self).__init__() + self.name=self.__class__.__name__ self.mayReturnEmpty = True self.mayIndexError = False - class GoToColumn(_PositionToken): """ Token to advance to a specific column of input text; useful for tabular report scraping. """ - - def __init__(self, colno): - super(GoToColumn, self).__init__() + def __init__( self, colno ): + super(GoToColumn,self).__init__() self.col = colno - def preParse(self, instring, loc): - if col(loc, instring) != self.col: + def preParse( self, instring, loc ): + if col(loc,instring) != self.col: instrlen = len(instring) if self.ignoreExprs: - loc = self._skipIgnorables(instring, loc) - while ( - loc < instrlen - and instring[loc].isspace() - and col(loc, instring) != self.col - ): + loc = self._skipIgnorables( instring, loc ) + while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : loc += 1 return loc - def parseImpl(self, instring, loc, doActions=True): - thiscol = col(loc, instring) + def parseImpl( self, instring, loc, doActions=True ): + thiscol = col( loc, instring ) if thiscol > self.col: - raise ParseException(instring, loc, "Text not in expected column", self) + raise ParseException( instring, loc, "Text not in expected column", self ) newloc = loc + self.col - thiscol - ret = instring[loc:newloc] + ret = instring[ loc: newloc ] return newloc, ret class LineStart(_PositionToken): """ Matches if current position is at the beginning of a line within the parse string - + Example:: - + test = '''\ AAA this line AAA and this line @@ -3528,82 +3115,74 @@ class LineStart(_PositionToken): for t in (LineStart() + 'AAA' + restOfLine).searchString(test): print(t) - + Prints:: ['AAA', ' this line'] - ['AAA', ' and this line'] + ['AAA', ' and this line'] """ - - def __init__(self): - super(LineStart, self).__init__() + def __init__( self ): + super(LineStart,self).__init__() self.errmsg = "Expected start of line" - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): if col(loc, instring) == 1: return loc, [] raise ParseException(instring, loc, self.errmsg, self) - class LineEnd(_PositionToken): """ Matches if current position is at the end of a line within the parse string """ - - def __init__(self): - super(LineEnd, self).__init__() - self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) + def __init__( self ): + super(LineEnd,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) self.errmsg = "Expected end of line" - def parseImpl(self, instring, loc, doActions=True): - if loc < len(instring): + def parseImpl( self, instring, loc, doActions=True ): + if loc len(instring): return loc, [] else: raise ParseException(instring, loc, self.errmsg, self) - class WordStart(_PositionToken): """ Matches if the current position is at the beginning of a Word, and @@ -3612,22 +3191,18 @@ class WordStart(_PositionToken): use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of the string being parsed, or at the beginning of a line. """ - - def __init__(self, wordChars=printables): - super(WordStart, self).__init__() + def __init__(self, wordChars = printables): + super(WordStart,self).__init__() self.wordChars = set(wordChars) self.errmsg = "Not at the start of a word" - def parseImpl(self, instring, loc, doActions=True): + def parseImpl(self, instring, loc, doActions=True ): if loc != 0: - if ( - instring[loc - 1] in self.wordChars - or instring[loc] not in self.wordChars - ): + if (instring[loc-1] in self.wordChars or + instring[loc] not in self.wordChars): raise ParseException(instring, loc, self.errmsg, self) return loc, [] - class WordEnd(_PositionToken): """ Matches if the current position is at the end of a Word, and @@ -3636,20 +3211,17 @@ class WordEnd(_PositionToken): use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of the string being parsed, or at the end of a line. """ - - def __init__(self, wordChars=printables): - super(WordEnd, self).__init__() + def __init__(self, wordChars = printables): + super(WordEnd,self).__init__() self.wordChars = set(wordChars) self.skipWhitespace = False self.errmsg = "Not at the end of a word" - def parseImpl(self, instring, loc, doActions=True): + def parseImpl(self, instring, loc, doActions=True ): instrlen = len(instring) - if instrlen > 0 and loc < instrlen: - if ( - instring[loc] in self.wordChars - or instring[loc - 1] not in self.wordChars - ): + if instrlen>0 and loc maxExcLoc: @@ -3888,9 +3449,7 @@ def parseImpl(self, instring, loc, doActions=True): maxExcLoc = err.loc except IndexError: if len(instring) > maxExcLoc: - maxException = ParseException( - instring, len(instring), e.errmsg, self - ) + maxException = ParseException(instring,len(instring),e.errmsg,self) maxExcLoc = len(instring) else: # save match among all matches, to retry longest to shortest @@ -3898,9 +3457,9 @@ def parseImpl(self, instring, loc, doActions=True): if matches: matches.sort(key=lambda x: -x[0]) - for _, e in matches: + for _,e in matches: try: - return e._parse(instring, loc, doActions) + return e._parse( instring, loc, doActions ) except ParseException as err: err.__traceback__ = None if err.loc > maxExcLoc: @@ -3911,17 +3470,16 @@ def parseImpl(self, instring, loc, doActions=True): maxException.msg = self.errmsg raise maxException else: - raise ParseException( - instring, loc, "no defined alternatives to match", self - ) + raise ParseException(instring, loc, "no defined alternatives to match", self) - def __ixor__(self, other): - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - return self.append(other) # Or( [ self, other ] ) - def __str__(self): - if hasattr(self, "name"): + def __ixor__(self, other ): + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + return self.append( other ) #Or( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): return self.name if self.strRepr is None: @@ -3929,10 +3487,10 @@ def __str__(self): return self.strRepr - def checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] for e in self.exprs: - e.checkRecursion(subRecCheckList) + e.checkRecursion( subRecCheckList ) class MatchFirst(ParseExpression): @@ -3943,7 +3501,7 @@ class MatchFirst(ParseExpression): Example:: # construct MatchFirst using '|' operator - + # watch the order of expressions to match number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] @@ -3952,20 +3510,19 @@ class MatchFirst(ParseExpression): number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] """ - - def __init__(self, exprs, savelist=False): - super(MatchFirst, self).__init__(exprs, savelist) + def __init__( self, exprs, savelist = False ): + super(MatchFirst,self).__init__(exprs, savelist) if self.exprs: self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) else: self.mayReturnEmpty = True - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): maxExcLoc = -1 maxException = None for e in self.exprs: try: - ret = e._parse(instring, loc, doActions) + ret = e._parse( instring, loc, doActions ) return ret except ParseException as err: if err.loc > maxExcLoc: @@ -3973,9 +3530,7 @@ def parseImpl(self, instring, loc, doActions=True): maxExcLoc = err.loc except IndexError: if len(instring) > maxExcLoc: - maxException = ParseException( - instring, len(instring), e.errmsg, self - ) + maxException = ParseException(instring,len(instring),e.errmsg,self) maxExcLoc = len(instring) # only got here if no expression matched, raise exception for match that made it the furthest @@ -3984,17 +3539,15 @@ def parseImpl(self, instring, loc, doActions=True): maxException.msg = self.errmsg raise maxException else: - raise ParseException( - instring, loc, "no defined alternatives to match", self - ) + raise ParseException(instring, loc, "no defined alternatives to match", self) - def __ior__(self, other): - if isinstance(other, basestring): - other = ParserElement._literalStringClass(other) - return self.append(other) # MatchFirst( [ self, other ] ) + def __ior__(self, other ): + if isinstance( other, basestring ): + other = ParserElement._literalStringClass( other ) + return self.append( other ) #MatchFirst( [ self, other ] ) - def __str__(self): - if hasattr(self, "name"): + def __str__( self ): + if hasattr(self,"name"): return self.name if self.strRepr is None: @@ -4002,10 +3555,10 @@ def __str__(self): return self.strRepr - def checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] for e in self.exprs: - e.checkRecursion(subRecCheckList) + e.checkRecursion( subRecCheckList ) class Each(ParseExpression): @@ -4023,7 +3576,7 @@ class Each(ParseExpression): color_attr = "color:" + color("color") size_attr = "size:" + integer("size") - # use Each (using operator '&') to accept attributes in any order + # use Each (using operator '&') to accept attributes in any order # (shape and posn are required, color and size are optional) shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) @@ -4062,41 +3615,26 @@ class Each(ParseExpression): - shape: TRIANGLE - size: 20 """ - - def __init__(self, exprs, savelist=True): - super(Each, self).__init__(exprs, savelist) + def __init__( self, exprs, savelist = True ): + super(Each,self).__init__(exprs, savelist) self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) self.skipWhitespace = True self.initExprGroups = True - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): if self.initExprGroups: - self.opt1map = dict( - (id(e.expr), e) for e in self.exprs if isinstance(e, Optional) - ) - opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)] - opt2 = [ - e - for e in self.exprs - if e.mayReturnEmpty and not isinstance(e, Optional) - ] + self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) + opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] + opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] self.optionals = opt1 + opt2 - self.multioptionals = [ - e.expr for e in self.exprs if isinstance(e, ZeroOrMore) - ] - self.multirequired = [ - e.expr for e in self.exprs if isinstance(e, OneOrMore) - ] - self.required = [ - e - for e in self.exprs - if not isinstance(e, (Optional, ZeroOrMore, OneOrMore)) - ] + self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] + self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] + self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] self.required += self.multirequired self.initExprGroups = False tmpLoc = loc tmpReqd = self.required[:] - tmpOpt = self.optionals[:] + tmpOpt = self.optionals[:] matchOrder = [] keepMatching = True @@ -4105,11 +3643,11 @@ def parseImpl(self, instring, loc, doActions=True): failed = [] for e in tmpExprs: try: - tmpLoc = e.tryParse(instring, tmpLoc) + tmpLoc = e.tryParse( instring, tmpLoc ) except ParseException: failed.append(e) else: - matchOrder.append(self.opt1map.get(id(e), e)) + matchOrder.append(self.opt1map.get(id(e),e)) if e in tmpReqd: tmpReqd.remove(e) elif e in tmpOpt: @@ -4119,25 +3657,21 @@ def parseImpl(self, instring, loc, doActions=True): if tmpReqd: missing = ", ".join(_ustr(e) for e in tmpReqd) - raise ParseException( - instring, loc, "Missing one or more required elements (%s)" % missing - ) + raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) # add any unmatched Optionals, in case they have default values defined - matchOrder += [ - e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt - ] + matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] resultlist = [] for e in matchOrder: - loc, results = e._parse(instring, loc, doActions) + loc,results = e._parse(instring,loc,doActions) resultlist.append(results) finalResults = sum(resultlist, ParseResults([])) return loc, finalResults - def __str__(self): - if hasattr(self, "name"): + def __str__( self ): + if hasattr(self,"name"): return self.name if self.strRepr is None: @@ -4145,20 +3679,19 @@ def __str__(self): return self.strRepr - def checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] for e in self.exprs: - e.checkRecursion(subRecCheckList) + e.checkRecursion( subRecCheckList ) class ParseElementEnhance(ParserElement): """ Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. """ - - def __init__(self, expr, savelist=False): - super(ParseElementEnhance, self).__init__(savelist) - if isinstance(expr, basestring): + def __init__( self, expr, savelist=False ): + super(ParseElementEnhance,self).__init__(savelist) + if isinstance( expr, basestring ): if issubclass(ParserElement._literalStringClass, Token): expr = ParserElement._literalStringClass(expr) else: @@ -4168,64 +3701,64 @@ def __init__(self, expr, savelist=False): if expr is not None: self.mayIndexError = expr.mayIndexError self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars(expr.whiteChars) + self.setWhitespaceChars( expr.whiteChars ) self.skipWhitespace = expr.skipWhitespace self.saveAsList = expr.saveAsList self.callPreparse = expr.callPreparse self.ignoreExprs.extend(expr.ignoreExprs) - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): if self.expr is not None: - return self.expr._parse(instring, loc, doActions, callPreParse=False) + return self.expr._parse( instring, loc, doActions, callPreParse=False ) else: - raise ParseException("", loc, self.errmsg, self) + raise ParseException("",loc,self.errmsg,self) - def leaveWhitespace(self): + def leaveWhitespace( self ): self.skipWhitespace = False self.expr = self.expr.copy() if self.expr is not None: self.expr.leaveWhitespace() return self - def ignore(self, other): - if isinstance(other, Suppress): + def ignore( self, other ): + if isinstance( other, Suppress ): if other not in self.ignoreExprs: - super(ParseElementEnhance, self).ignore(other) + super( ParseElementEnhance, self).ignore( other ) if self.expr is not None: - self.expr.ignore(self.ignoreExprs[-1]) + self.expr.ignore( self.ignoreExprs[-1] ) else: - super(ParseElementEnhance, self).ignore(other) + super( ParseElementEnhance, self).ignore( other ) if self.expr is not None: - self.expr.ignore(self.ignoreExprs[-1]) + self.expr.ignore( self.ignoreExprs[-1] ) return self - def streamline(self): - super(ParseElementEnhance, self).streamline() + def streamline( self ): + super(ParseElementEnhance,self).streamline() if self.expr is not None: self.expr.streamline() return self - def checkRecursion(self, parseElementList): + def checkRecursion( self, parseElementList ): if self in parseElementList: - raise RecursiveGrammarException(parseElementList + [self]) - subRecCheckList = parseElementList[:] + [self] + raise RecursiveGrammarException( parseElementList+[self] ) + subRecCheckList = parseElementList[:] + [ self ] if self.expr is not None: - self.expr.checkRecursion(subRecCheckList) + self.expr.checkRecursion( subRecCheckList ) - def validate(self, validateTrace=[]): - tmp = validateTrace[:] + [self] + def validate( self, validateTrace=[] ): + tmp = validateTrace[:]+[self] if self.expr is not None: self.expr.validate(tmp) - self.checkRecursion([]) + self.checkRecursion( [] ) - def __str__(self): + def __str__( self ): try: - return super(ParseElementEnhance, self).__str__() + return super(ParseElementEnhance,self).__str__() except Exception: pass if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr)) + self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) return self.strRepr @@ -4241,18 +3774,17 @@ class FollowedBy(ParseElementEnhance): data_word = Word(alphas) label = data_word + FollowedBy(':') attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - + OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() prints:: [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] """ - - def __init__(self, expr): - super(FollowedBy, self).__init__(expr) + def __init__( self, expr ): + super(FollowedBy,self).__init__(expr) self.mayReturnEmpty = True - def parseImpl(self, instring, loc, doActions=True): - self.expr.tryParse(instring, loc) + def parseImpl( self, instring, loc, doActions=True ): + self.expr.tryParse( instring, loc ) return loc, [] @@ -4265,25 +3797,22 @@ class NotAny(ParseElementEnhance): always returns a null token list. May be constructed using the '~' operator. Example:: - + """ - - def __init__(self, expr): - super(NotAny, self).__init__(expr) - # ~ self.leaveWhitespace() - self.skipWhitespace = ( - False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs - ) + def __init__( self, expr ): + super(NotAny,self).__init__(expr) + #~ self.leaveWhitespace() + self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, " + _ustr(self.expr) + self.errmsg = "Found unwanted token, "+_ustr(self.expr) - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): if self.expr.canParseNext(instring, loc): raise ParseException(instring, loc, self.errmsg, self) return loc, [] - def __str__(self): - if hasattr(self, "name"): + def __str__( self ): + if hasattr(self,"name"): return self.name if self.strRepr is None: @@ -4291,9 +3820,8 @@ def __str__(self): return self.strRepr - class _MultipleMatch(ParseElementEnhance): - def __init__(self, expr, stopOn=None): + def __init__( self, expr, stopOn=None): super(_MultipleMatch, self).__init__(expr) self.saveAsList = True ender = stopOn @@ -4301,45 +3829,44 @@ def __init__(self, expr, stopOn=None): ender = ParserElement._literalStringClass(ender) self.not_ender = ~ender if ender is not None else None - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): self_expr_parse = self.expr._parse self_skip_ignorables = self._skipIgnorables check_ender = self.not_ender is not None if check_ender: try_not_ender = self.not_ender.tryParse - + # must be at least one (but first see if we are the stopOn sentinel; # if so, fail) if check_ender: try_not_ender(instring, loc) - loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False) + loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) try: - hasIgnoreExprs = not not self.ignoreExprs + hasIgnoreExprs = (not not self.ignoreExprs) while 1: if check_ender: try_not_ender(instring, loc) if hasIgnoreExprs: - preloc = self_skip_ignorables(instring, loc) + preloc = self_skip_ignorables( instring, loc ) else: preloc = loc - loc, tmptokens = self_expr_parse(instring, preloc, doActions) + loc, tmptokens = self_expr_parse( instring, preloc, doActions ) if tmptokens or tmptokens.haskeys(): tokens += tmptokens - except (ParseException, IndexError): + except (ParseException,IndexError): pass return loc, tokens - - + class OneOrMore(_MultipleMatch): """ Repetition of one or more of the given expression. - + Parameters: - expr - expression that must match one or more times - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) + (only required if the sentinel would ordinarily match the repetition + expression) Example:: data_word = Word(alphas) @@ -4352,13 +3879,13 @@ class OneOrMore(_MultipleMatch): # use stopOn attribute for OneOrMore to avoid reading label string as part of the data attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] - + # could also be written as (attr_expr * (1,)).parseString(text).pprint() """ - def __str__(self): - if hasattr(self, "name"): + def __str__( self ): + if hasattr(self,"name"): return self.name if self.strRepr is None: @@ -4366,32 +3893,30 @@ def __str__(self): return self.strRepr - class ZeroOrMore(_MultipleMatch): """ Optional repetition of zero or more of the given expression. - + Parameters: - expr - expression that must match zero or more times - stopOn - (default=C{None}) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) + (only required if the sentinel would ordinarily match the repetition + expression) Example: similar to L{OneOrMore} """ - - def __init__(self, expr, stopOn=None): - super(ZeroOrMore, self).__init__(expr, stopOn=stopOn) + def __init__( self, expr, stopOn=None): + super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): + + def parseImpl( self, instring, loc, doActions=True ): try: return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) - except (ParseException, IndexError): + except (ParseException,IndexError): return loc, [] - def __str__(self): - if hasattr(self, "name"): + def __str__( self ): + if hasattr(self,"name"): return self.name if self.strRepr is None: @@ -4399,20 +3924,14 @@ def __str__(self): return self.strRepr - class _NullToken(object): def __bool__(self): return False - __nonzero__ = __bool__ - def __str__(self): return "" - _optionalNotMatched = _NullToken() - - class Optional(ParseElementEnhance): """ Optional matching of the given expression. @@ -4427,10 +3946,10 @@ class Optional(ParseElementEnhance): zip.runTests(''' # traditional ZIP code 12345 - + # ZIP+4 form 12101-0001 - + # invalid ZIP 98765- ''') @@ -4448,29 +3967,28 @@ class Optional(ParseElementEnhance): ^ FAIL: Expected end of text (at char 5), (line:1, col:6) """ - - def __init__(self, expr, default=_optionalNotMatched): - super(Optional, self).__init__(expr, savelist=False) + def __init__( self, expr, default=_optionalNotMatched ): + super(Optional,self).__init__( expr, savelist=False ) self.saveAsList = self.expr.saveAsList self.defaultValue = default self.mayReturnEmpty = True - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): try: - loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) - except (ParseException, IndexError): + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + except (ParseException,IndexError): if self.defaultValue is not _optionalNotMatched: if self.expr.resultsName: - tokens = ParseResults([self.defaultValue]) + tokens = ParseResults([ self.defaultValue ]) tokens[self.expr.resultsName] = self.defaultValue else: - tokens = [self.defaultValue] + tokens = [ self.defaultValue ] else: tokens = [] return loc, tokens - def __str__(self): - if hasattr(self, "name"): + def __str__( self ): + if hasattr(self,"name"): return self.name if self.strRepr is None: @@ -4478,19 +3996,18 @@ def __str__(self): return self.strRepr - class SkipTo(ParseElementEnhance): """ Token for skipping over all undefined text until the matched expression is found. Parameters: - expr - target expression marking the end of the data to be skipped - - include - (default=C{False}) if True, the target expression is also parsed + - include - (default=C{False}) if True, the target expression is also parsed (the skipped text and target expression are returned as a 2-element list). - - ignore - (default=C{None}) used to define grammars (typically quoted strings and + - ignore - (default=C{None}) used to define grammars (typically quoted strings and comments) that might contain false matches to the target expression - - failOn - (default=C{None}) define expressions that are not allowed to be - included in the skipped test; if found before the target expression is found, + - failOn - (default=C{None}) define expressions that are not allowed to be + included in the skipped test; if found before the target expression is found, the SkipTo is not a match Example:: @@ -4510,11 +4027,11 @@ class SkipTo(ParseElementEnhance): # - parse action will call token.strip() for each matched token, i.e., the description body string_data = SkipTo(SEP, ignore=quotedString) string_data.setParseAction(tokenMap(str.strip)) - ticket_expr = (integer("issue_num") + SEP - + string_data("sev") + SEP - + string_data("desc") + SEP + ticket_expr = (integer("issue_num") + SEP + + string_data("sev") + SEP + + string_data("desc") + SEP + integer("days_open")) - + for tkt in ticket_expr.searchString(report): print tkt.dump() prints:: @@ -4534,9 +4051,8 @@ class SkipTo(ParseElementEnhance): - issue_num: 79 - sev: Minor """ - - def __init__(self, other, include=False, ignore=None, failOn=None): - super(SkipTo, self).__init__(other) + def __init__( self, other, include=False, ignore=None, failOn=None ): + super( SkipTo, self ).__init__( other ) self.ignoreExpr = ignore self.mayReturnEmpty = True self.mayIndexError = False @@ -4546,27 +4062,23 @@ def __init__(self, other, include=False, ignore=None, failOn=None): self.failOn = ParserElement._literalStringClass(failOn) else: self.failOn = failOn - self.errmsg = "No match found for " + _ustr(self.expr) + self.errmsg = "No match found for "+_ustr(self.expr) - def parseImpl(self, instring, loc, doActions=True): + def parseImpl( self, instring, loc, doActions=True ): startloc = loc instrlen = len(instring) expr = self.expr expr_parse = self.expr._parse - self_failOn_canParseNext = ( - self.failOn.canParseNext if self.failOn is not None else None - ) - self_ignoreExpr_tryParse = ( - self.ignoreExpr.tryParse if self.ignoreExpr is not None else None - ) - + self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None + self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None + tmploc = loc while tmploc <= instrlen: if self_failOn_canParseNext is not None: # break if failOn expression matches if self_failOn_canParseNext(instring, tmploc): break - + if self_ignoreExpr_tryParse is not None: # advance past ignore expressions while 1: @@ -4574,7 +4086,7 @@ def parseImpl(self, instring, loc, doActions=True): tmploc = self_ignoreExpr_tryParse(instring, tmploc) except ParseBaseException: break - + try: expr_parse(instring, tmploc, doActions=False, callPreParse=False) except (ParseException, IndexError): @@ -4592,14 +4104,13 @@ def parseImpl(self, instring, loc, doActions=True): loc = tmploc skiptext = instring[startloc:loc] skipresult = ParseResults(skiptext) - + if self.includeMatch: - loc, mat = expr_parse(instring, loc, doActions, callPreParse=False) + loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) skipresult += mat return loc, skipresult - class Forward(ParseElementEnhance): """ Forward declaration of an expression to be defined later - @@ -4619,46 +4130,45 @@ class Forward(ParseElementEnhance): See L{ParseResults.pprint} for an example of a recursive parser created using C{Forward}. """ + def __init__( self, other=None ): + super(Forward,self).__init__( other, savelist=False ) - def __init__(self, other=None): - super(Forward, self).__init__(other, savelist=False) - - def __lshift__(self, other): - if isinstance(other, basestring): + def __lshift__( self, other ): + if isinstance( other, basestring ): other = ParserElement._literalStringClass(other) self.expr = other self.strRepr = None self.mayIndexError = self.expr.mayIndexError self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars(self.expr.whiteChars) + self.setWhitespaceChars( self.expr.whiteChars ) self.skipWhitespace = self.expr.skipWhitespace self.saveAsList = self.expr.saveAsList self.ignoreExprs.extend(self.expr.ignoreExprs) return self - + def __ilshift__(self, other): return self << other - - def leaveWhitespace(self): + + def leaveWhitespace( self ): self.skipWhitespace = False return self - def streamline(self): + def streamline( self ): if not self.streamlined: self.streamlined = True if self.expr is not None: self.expr.streamline() return self - def validate(self, validateTrace=[]): + def validate( self, validateTrace=[] ): if self not in validateTrace: - tmp = validateTrace[:] + [self] + tmp = validateTrace[:]+[self] if self.expr is not None: self.expr.validate(tmp) self.checkRecursion([]) - def __str__(self): - if hasattr(self, "name"): + def __str__( self ): + if hasattr(self,"name"): return self.name return self.__class__.__name__ + ": ..." @@ -4676,28 +4186,24 @@ def __str__(self): def copy(self): if self.expr is not None: - return super(Forward, self).copy() + return super(Forward,self).copy() else: ret = Forward() ret <<= self return ret - class _ForwardNoRecurse(Forward): - def __str__(self): + def __str__( self ): return "..." - class TokenConverter(ParseElementEnhance): """ Abstract subclass of C{ParseExpression}, for converting parsed results. """ - - def __init__(self, expr, savelist=False): - super(TokenConverter, self).__init__(expr) # , savelist ) + def __init__( self, expr, savelist=False ): + super(TokenConverter,self).__init__( expr )#, savelist ) self.saveAsList = False - class Combine(TokenConverter): """ Converter to concatenate all matching tokens to a single string. @@ -4715,9 +4221,8 @@ class Combine(TokenConverter): # no match when there are internal spaces print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) """ - - def __init__(self, expr, joinString="", adjacent=True): - super(Combine, self).__init__(expr) + def __init__( self, expr, joinString="", adjacent=True ): + super(Combine,self).__init__( expr ) # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself if adjacent: self.leaveWhitespace() @@ -4726,26 +4231,23 @@ def __init__(self, expr, joinString="", adjacent=True): self.joinString = joinString self.callPreparse = True - def ignore(self, other): + def ignore( self, other ): if self.adjacent: ParserElement.ignore(self, other) else: - super(Combine, self).ignore(other) + super( Combine, self).ignore( other ) return self - def postParse(self, instring, loc, tokenlist): + def postParse( self, instring, loc, tokenlist ): retToks = tokenlist.copy() del retToks[:] - retToks += ParseResults( - ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults - ) + retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) if self.resultsName and retToks.haskeys(): - return [retToks] + return [ retToks ] else: return retToks - class Group(TokenConverter): """ Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. @@ -4760,14 +4262,12 @@ class Group(TokenConverter): func = ident + Group(Optional(delimitedList(term))) print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] """ - - def __init__(self, expr): - super(Group, self).__init__(expr) + def __init__( self, expr ): + super(Group,self).__init__( expr ) self.saveAsList = True - def postParse(self, instring, loc, tokenlist): - return [tokenlist] - + def postParse( self, instring, loc, tokenlist ): + return [ tokenlist ] class Dict(TokenConverter): """ @@ -4782,16 +4282,16 @@ class Dict(TokenConverter): text = "shape: SQUARE posn: upper left color: light blue texture: burlap" attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - + # print attributes as plain groups print(OneOrMore(attr_expr).parseString(text).dump()) - + # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names result = Dict(OneOrMore(Group(attr_expr))).parseString(text) print(result.dump()) - + # access named fields as dict entries, or output as dict - print(result['shape']) + print(result['shape']) print(result.asDict()) prints:: ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] @@ -4805,34 +4305,31 @@ class Dict(TokenConverter): {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} See more examples at L{ParseResults} of accessing fields by results name. """ - - def __init__(self, expr): - super(Dict, self).__init__(expr) + def __init__( self, expr ): + super(Dict,self).__init__( expr ) self.saveAsList = True - def postParse(self, instring, loc, tokenlist): - for i, tok in enumerate(tokenlist): + def postParse( self, instring, loc, tokenlist ): + for i,tok in enumerate(tokenlist): if len(tok) == 0: continue ikey = tok[0] - if isinstance(ikey, int): + if isinstance(ikey,int): ikey = _ustr(tok[0]).strip() - if len(tok) == 1: - tokenlist[ikey] = _ParseResultsWithOffset("", i) - elif len(tok) == 2 and not isinstance(tok[1], ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) + if len(tok)==1: + tokenlist[ikey] = _ParseResultsWithOffset("",i) + elif len(tok)==2 and not isinstance(tok[1],ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) else: - dictvalue = tok.copy() # ParseResults(i) + dictvalue = tok.copy() #ParseResults(i) del dictvalue[0] - if len(dictvalue) != 1 or ( - isinstance(dictvalue, ParseResults) and dictvalue.haskeys() - ): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) + if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) if self.resultsName: - return [tokenlist] + return [ tokenlist ] else: return tokenlist @@ -4856,11 +4353,10 @@ class Suppress(TokenConverter): ['a', 'b', 'c', 'd'] (See also L{delimitedList}.) """ - - def postParse(self, instring, loc, tokenlist): + def postParse( self, instring, loc, tokenlist ): return [] - def suppress(self): + def suppress( self ): return self @@ -4868,26 +4364,22 @@ class OnlyOnce(object): """ Wrapper for parse actions, to ensure they are only called once. """ - def __init__(self, methodCall): self.callable = _trim_arity(methodCall) self.called = False - - def __call__(self, s, l, t): + def __call__(self,s,l,t): if not self.called: - results = self.callable(s, l, t) + results = self.callable(s,l,t) self.called = True return results - raise ParseException(s, l, "") - + raise ParseException(s,l,"") def reset(self): self.called = False - def traceParseAction(f): """ - Decorator for debugging parse actions. - + Decorator for debugging parse actions. + When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. @@ -4906,34 +4398,29 @@ def remove_duplicate_chars(tokens): ['dfjkls'] """ f = _trim_arity(f) - def z(*paArgs): thisFunc = f.__name__ - s, l, t = paArgs[-3:] - if len(paArgs) > 3: - thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc - sys.stderr.write( - ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t) - ) + s,l,t = paArgs[-3:] + if len(paArgs)>3: + thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc + sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) try: ret = f(*paArgs) except Exception as exc: - sys.stderr.write("< ['aa', 'bb', 'cc'] delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] """ - dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..." + dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." if combine: - return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName) + return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) else: - return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName) + return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) - -def countedArray(expr, intExpr=None): +def countedArray( expr, intExpr=None ): """ Helper to define a counted list of expressions. This helper defines a pattern of the form:: integer expr expr expr... where the leading integer tells how many expr expressions follow. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. - + If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. Example:: @@ -4972,31 +4458,27 @@ def countedArray(expr, intExpr=None): countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] """ arrayExpr = Forward() - - def countFieldParseAction(s, l, t): + def countFieldParseAction(s,l,t): n = t[0] - arrayExpr << (n and Group(And([expr] * n)) or Group(empty)) + arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) return [] - if intExpr is None: - intExpr = Word(nums).setParseAction(lambda t: int(t[0])) + intExpr = Word(nums).setParseAction(lambda t:int(t[0])) else: intExpr = intExpr.copy() intExpr.setName("arrayLen") intExpr.addParseAction(countFieldParseAction, callDuringTry=True) - return (intExpr + arrayExpr).setName("(len) " + _ustr(expr) + "...") - + return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') def _flatten(L): ret = [] for i in L: - if isinstance(i, list): + if isinstance(i,list): ret.extend(_flatten(i)) else: ret.append(i) return ret - def matchPreviousLiteral(expr): """ Helper to define an expression that is indirectly defined from @@ -5011,8 +4493,7 @@ def matchPreviousLiteral(expr): Do I{not} use with packrat parsing enabled. """ rep = Forward() - - def copyTokenToRepeater(s, l, t): + def copyTokenToRepeater(s,l,t): if t: if len(t) == 1: rep << t[0] @@ -5022,12 +4503,10 @@ def copyTokenToRepeater(s, l, t): rep << And(Literal(tt) for tt in tflat) else: rep << Empty() - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName("(prev) " + _ustr(expr)) + rep.setName('(prev) ' + _ustr(expr)) return rep - def matchPreviousExpr(expr): """ Helper to define an expression that is indirectly defined from @@ -5045,32 +4524,26 @@ def matchPreviousExpr(expr): rep = Forward() e2 = expr.copy() rep <<= e2 - - def copyTokenToRepeater(s, l, t): + def copyTokenToRepeater(s,l,t): matchTokens = _flatten(t.asList()) - - def mustMatchTheseTokens(s, l, t): + def mustMatchTheseTokens(s,l,t): theseTokens = _flatten(t.asList()) - if theseTokens != matchTokens: - raise ParseException("", 0, "") - - rep.setParseAction(mustMatchTheseTokens, callDuringTry=True) - + if theseTokens != matchTokens: + raise ParseException("",0,"") + rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName("(prev) " + _ustr(expr)) + rep.setName('(prev) ' + _ustr(expr)) return rep - def _escapeRegexRangeChars(s): - # ~ escape these chars: ^-] + #~ escape these chars: ^-] for c in r"\^-]": - s = s.replace(c, _bslash + c) - s = s.replace("\n", r"\n") - s = s.replace("\t", r"\t") + s = s.replace(c,_bslash+c) + s = s.replace("\n",r"\n") + s = s.replace("\t",r"\t") return _ustr(s) - -def oneOf(strs, caseless=False, useRegex=True): +def oneOf( strs, caseless=False, useRegex=True ): """ Helper to quickly define a set of alternative Literals, and makes sure to do longest-first testing when there is a conflict, regardless of the input order, @@ -5094,68 +4567,56 @@ def oneOf(strs, caseless=False, useRegex=True): [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] """ if caseless: - isequal = lambda a, b: a.upper() == b.upper() - masks = lambda a, b: b.upper().startswith(a.upper()) + isequal = ( lambda a,b: a.upper() == b.upper() ) + masks = ( lambda a,b: b.upper().startswith(a.upper()) ) parseElementClass = CaselessLiteral else: - isequal = lambda a, b: a == b - masks = lambda a, b: b.startswith(a) + isequal = ( lambda a,b: a == b ) + masks = ( lambda a,b: b.startswith(a) ) parseElementClass = Literal symbols = [] - if isinstance(strs, basestring): + if isinstance(strs,basestring): symbols = strs.split() elif isinstance(strs, collections.Iterable): symbols = list(strs) else: - warnings.warn( - "Invalid argument to oneOf, expected string or iterable", - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("Invalid argument to oneOf, expected string or iterable", + SyntaxWarning, stacklevel=2) if not symbols: return NoMatch() i = 0 - while i < len(symbols) - 1: + while i < len(symbols)-1: cur = symbols[i] - for j, other in enumerate(symbols[i + 1 :]): - if isequal(other, cur): - del symbols[i + j + 1] + for j,other in enumerate(symbols[i+1:]): + if ( isequal(other, cur) ): + del symbols[i+j+1] break - elif masks(cur, other): - del symbols[i + j + 1] - symbols.insert(i, other) + elif ( masks(cur, other) ): + del symbols[i+j+1] + symbols.insert(i,other) cur = other break else: i += 1 if not caseless and useRegex: - # ~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) + #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) try: - if len(symbols) == len("".join(symbols)): - return Regex( - "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) - ).setName(" | ".join(symbols)) + if len(symbols)==len("".join(symbols)): + return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) else: - return Regex("|".join(re.escape(sym) for sym in symbols)).setName( - " | ".join(symbols) - ) + return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) except Exception: - warnings.warn( - "Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, - stacklevel=2, - ) + warnings.warn("Exception creating Regex for oneOf, building MatchFirst", + SyntaxWarning, stacklevel=2) - # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).setName( - " | ".join(symbols) - ) + # last resort, just use MatchFirst + return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) -def dictOf(key, value): +def dictOf( key, value ): """ Helper to easily and clearly define a dictionary by specifying the respective patterns for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens @@ -5168,7 +4629,7 @@ def dictOf(key, value): text = "shape: SQUARE posn: upper left color: light blue texture: burlap" attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) print(OneOrMore(attr_expr).parseString(text).dump()) - + attr_label = label attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) @@ -5188,19 +4649,18 @@ def dictOf(key, value): SQUARE {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} """ - return Dict(ZeroOrMore(Group(key + value))) - + return Dict( ZeroOrMore( Group ( key + value ) ) ) def originalTextFor(expr, asString=True): """ Helper to return the original, untokenized text for a given expression. Useful to restore the parsed fields of an HTML start tag into the raw tag text itself, or to revert separate tokens with intervening whitespace back to the original matching - input text. By default, returns astring containing the original parsed text. - - If the optional C{asString} argument is passed as C{False}, then the return value is a - C{L{ParseResults}} containing any results names that were originally matched, and a - single token containing the original matched text from the input string. So if + input text. By default, returns astring containing the original parsed text. + + If the optional C{asString} argument is passed as C{False}, then the return value is a + C{L{ParseResults}} containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if the expression passed to C{L{originalTextFor}} contains expressions with defined results names, you must set C{asString} to C{False} if you want to preserve those results name values. @@ -5215,29 +4675,25 @@ def originalTextFor(expr, asString=True): [' bold text '] ['text'] """ - locMarker = Empty().setParseAction(lambda s, loc, t: loc) + locMarker = Empty().setParseAction(lambda s,loc,t: loc) endlocMarker = locMarker.copy() endlocMarker.callPreparse = False matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") if asString: - extractText = lambda s, l, t: s[t._original_start : t._original_end] + extractText = lambda s,l,t: s[t._original_start:t._original_end] else: - - def extractText(s, l, t): - t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]] - + def extractText(s,l,t): + t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] matchExpr.setParseAction(extractText) matchExpr.ignoreExprs = expr.ignoreExprs return matchExpr - -def ungroup(expr): +def ungroup(expr): """ Helper to undo pyparsing's default grouping of And expressions, even if all but one are non-empty. """ - return TokenConverter(expr).setParseAction(lambda t: t[0]) - + return TokenConverter(expr).setParseAction(lambda t:t[0]) def locatedExpr(expr): """ @@ -5259,45 +4715,23 @@ def locatedExpr(expr): [[8, 'lksdjjf', 15]] [[18, 'lkkjj', 23]] """ - locator = Empty().setParseAction(lambda s, l, t: l) - return Group( - locator("locn_start") - + expr("value") - + locator.copy().leaveWhitespace()("locn_end") - ) + locator = Empty().setParseAction(lambda s,l,t: l) + return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) # convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") +empty = Empty().setName("empty") +lineStart = LineStart().setName("lineStart") +lineEnd = LineEnd().setName("lineEnd") stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction( - lambda s, l, t: t[0][1] -) -_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction( - lambda s, l, t: unichr(int(t[0].lstrip(r"\0x"), 16)) -) -_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction( - lambda s, l, t: unichr(int(t[0][1:], 8)) -) -_singleChar = ( - _escapedPunc - | _escapedHexChar - | _escapedOctChar - | Word(printables, excludeChars=r"\]", exact=1) - | Regex(r"\w", re.UNICODE) -) -_charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = ( - Literal("[") - + Optional("^").setResultsName("negate") - + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") - + "]" -) +stringEnd = StringEnd().setName("stringEnd") +_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) +_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) +_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) +_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) +_charRange = Group(_singleChar + Suppress("-") + _singleChar) +_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" def srange(s): r""" @@ -5311,36 +4745,28 @@ def srange(s): The values enclosed in the []'s may be: - a single character - an escaped character with a leading backslash (such as C{\-} or C{\]}) - - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) - (C{\0x##} is also supported for backwards compatibility) + - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) + (C{\0x##} is also supported for backwards compatibility) - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) - a range of any of the above, separated by a dash (C{'a-z'}, etc.) - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) """ - _expanded = ( - lambda p: p - if not isinstance(p, ParseResults) - else "".join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) - ) + _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) try: return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) except Exception: return "" - def matchOnlyAtCol(n): """ Helper method for defining parse actions that require matching at a specific column in the input text. """ - - def verifyCol(strg, locn, toks): - if col(locn, strg) != n: - raise ParseException(strg, locn, "matched token not at column %d" % n) - + def verifyCol(strg,locn,toks): + if col(locn,strg) != n: + raise ParseException(strg,locn,"matched token not at column %d" % n) return verifyCol - def replaceWith(replStr): """ Helper method for common parse actions that simply return a literal value. Especially @@ -5350,13 +4776,12 @@ def replaceWith(replStr): num = Word(nums).setParseAction(lambda toks: int(toks[0])) na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) term = na | num - + OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] """ - return lambda s, l, t: [replStr] - + return lambda s,l,t: [replStr] -def removeQuotes(s, l, t): +def removeQuotes(s,l,t): """ Helper parse action for removing quotation marks from parsed quoted strings. @@ -5370,10 +4795,9 @@ def removeQuotes(s, l, t): """ return t[0][1:-1] - def tokenMap(func, *args): """ - Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional + Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional args are passed, they are forwarded to the given function as additional arguments after the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the parsed data to an integer using base 16. @@ -5383,7 +4807,7 @@ def tokenMap(func, *args): hex_ints.runTests(''' 00 11 22 aa FF 0a 0d 1a ''') - + upperword = Word(alphas).setParseAction(tokenMap(str.upper)) OneOrMore(upperword).runTests(''' my kingdom for a horse @@ -5403,80 +4827,53 @@ def tokenMap(func, *args): now is the winter of our discontent made glorious summer by this sun of york ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] """ - - def pa(s, l, t): + def pa(s,l,t): return [func(tokn, *args) for tokn in t] try: - func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) + func_name = getattr(func, '__name__', + getattr(func, '__class__').__name__) except Exception: func_name = str(func) pa.__name__ = func_name return pa - upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" - - + def _makeTags(tagStr, xml): """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr, basestring): + if isinstance(tagStr,basestring): resname = tagStr tagStr = Keyword(tagStr, caseless=not xml) else: resname = tagStr.name - tagAttrName = Word(alphas, alphanums + "_-:") - if xml: - tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) - openTag = ( - Suppress("<") - + tagStr("tag") - + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) - + Optional("/", default=[False]) - .setResultsName("empty") - .setParseAction(lambda s, l, t: t[0] == "/") - + Suppress(">") - ) + tagAttrName = Word(alphas,alphanums+"_-:") + if (xml): + tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) + openTag = Suppress("<") + tagStr("tag") + \ + Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") else: printablesLessRAbrack = "".join(c for c in printables if c not in ">") - tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word( - printablesLessRAbrack - ) - openTag = ( - Suppress("<") - + tagStr("tag") - + Dict( - ZeroOrMore( - Group( - tagAttrName.setParseAction(downcaseTokens) - + Optional(Suppress("=") + tagAttrValue) - ) - ) - ) - + Optional("/", default=[False]) - .setResultsName("empty") - .setParseAction(lambda s, l, t: t[0] == "/") - + Suppress(">") - ) + tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) + openTag = Suppress("<") + tagStr("tag") + \ + Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ + Optional( Suppress("=") + tagAttrValue ) ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") closeTag = Combine(_L("") - openTag = openTag.setResultsName( - "start" + "".join(resname.replace(":", " ").title().split()) - ).setName("<%s>" % resname) - closeTag = closeTag.setResultsName( - "end" + "".join(resname.replace(":", " ").title().split()) - ).setName("" % resname) + openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) + closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % resname) openTag.tag = resname closeTag.tag = resname return openTag, closeTag - def makeHTMLTags(tagStr): """ Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches @@ -5487,15 +4884,14 @@ def makeHTMLTags(tagStr): # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple a,a_end = makeHTMLTags("A") link_expr = a + SkipTo(a_end)("link_text") + a_end - + for link in link_expr.searchString(text): # attributes in the tag (like "href" shown here) are also accessible as named results print(link.link_text, '->', link.href) prints:: pyparsing -> http://pyparsing.wikispaces.com """ - return _makeTags(tagStr, False) - + return _makeTags( tagStr, False ) def makeXMLTags(tagStr): """ @@ -5504,10 +4900,9 @@ def makeXMLTags(tagStr): Example: similar to L{makeHTMLTags} """ - return _makeTags(tagStr, True) - + return _makeTags( tagStr, True ) -def withAttribute(*args, **attrDict): +def withAttribute(*args,**attrDict): """ Helper to create a validating parse action to be used with start tags created with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag @@ -5522,7 +4917,7 @@ def withAttribute(*args, **attrDict): - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) For attribute names with a namespace prefix, you must use the second form. Attribute names are matched insensitive to upper/lower case. - + If just testing for C{class} (with or without a namespace), use C{L{withClass}}. To verify that the attribute exists, but without specifying a value, pass @@ -5536,7 +4931,7 @@ def withAttribute(*args, **attrDict):
1,3 2,3 1,1
this has no type
- + ''' div,div_end = makeHTMLTags("div") @@ -5545,7 +4940,7 @@ def withAttribute(*args, **attrDict): grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.searchString(html): print(grid_header.body) - + # construct a match with any div tag having a type attribute, regardless of the value div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") @@ -5561,27 +4956,18 @@ def withAttribute(*args, **attrDict): attrs = args[:] else: attrs = attrDict.items() - attrs = [(k, v) for k, v in attrs] - - def pa(s, l, tokens): - for attrName, attrValue in attrs: + attrs = [(k,v) for k,v in attrs] + def pa(s,l,tokens): + for attrName,attrValue in attrs: if attrName not in tokens: - raise ParseException(s, l, "no matching attribute " + attrName) + raise ParseException(s,l,"no matching attribute " + attrName) if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException( - s, - l, - "attribute '%s' has value '%s', must be '%s'" - % (attrName, tokens[attrName], attrValue), - ) - + raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % + (attrName, tokens[attrName], attrValue)) return pa - - withAttribute.ANY_VALUE = object() - -def withClass(classname, namespace=""): +def withClass(classname, namespace=''): """ Simplified version of C{L{withAttribute}} when matching on a div class - made difficult because C{class} is a reserved word in Python. @@ -5594,15 +4980,15 @@ def withClass(classname, namespace=""):
1,3 2,3 1,1
this <div> has no class
- + ''' div,div_end = makeHTMLTags("div") div_grid = div().setParseAction(withClass("grid")) - + grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.searchString(html): print(grid_header.body) - + div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") for div_header in div_expr.searchString(html): @@ -5614,22 +5000,20 @@ def withClass(classname, namespace=""): 1,3 2,3 1,1 """ classattr = "%s:class" % namespace if namespace else "class" - return withAttribute(**{classattr: classname}) - + return withAttribute(**{classattr : classname}) opAssoc = _Constants() opAssoc.LEFT = object() opAssoc.RIGHT = object() - -def infixNotation(baseExpr, opList, lpar=Suppress("("), rpar=Suppress(")")): +def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): """ Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached - to operator expressions. The generated parser will also recognize the use + to operator expressions. The generated parser will also recognize the use of parentheses to override operator precedences (see example below). - + Note: if you define a deep operator list, you may see performance issues when using infixNotation. See L{ParserElement.enablePackrat} for a mechanism to potentially improve your parser performance. @@ -5659,15 +5043,15 @@ def infixNotation(baseExpr, opList, lpar=Suppress("("), rpar=Suppress(")")): Example:: # simple example of four-function arithmetic with ints and variable names integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - + varname = pyparsing_common.identifier + arith_expr = infixNotation(integer | varname, [ ('-', 1, opAssoc.RIGHT), (oneOf('* /'), 2, opAssoc.LEFT), (oneOf('+ -'), 2, opAssoc.LEFT), ]) - + arith_expr.runTests(''' 5+3*6 (5+3)*6 @@ -5684,64 +5068,44 @@ def infixNotation(baseExpr, opList, lpar=Suppress("("), rpar=Suppress(")")): [[['-', 2], '-', ['-', 11]]] """ ret = Forward() - lastExpr = baseExpr | (lpar + ret + rpar) - for i, operDef in enumerate(opList): - opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] + lastExpr = baseExpr | ( lpar + ret + rpar ) + for i,operDef in enumerate(opList): + opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr if arity == 3: if opExpr is None or len(opExpr) != 2: - raise ValueError( - "if numterms=3, opExpr must be a tuple or list of two expressions" - ) + raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") opExpr1, opExpr2 = opExpr thisExpr = Forward().setName(termName) if rightLeftAssoc == opAssoc.LEFT: if arity == 1: - matchExpr = FollowedBy(lastExpr + opExpr) + Group( - lastExpr + OneOrMore(opExpr) - ) + matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) elif arity == 2: if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( - lastExpr + OneOrMore(opExpr + lastExpr) - ) + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) else: - matchExpr = FollowedBy(lastExpr + lastExpr) + Group( - lastExpr + OneOrMore(lastExpr) - ) + matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) elif arity == 3: - matchExpr = FollowedBy( - lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr - ) + Group(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ + Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) else: - raise ValueError( - "operator must be unary (1), binary (2), or ternary (3)" - ) + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") elif rightLeftAssoc == opAssoc.RIGHT: if arity == 1: # try to avoid LR with this extra test if not isinstance(opExpr, Optional): opExpr = Optional(opExpr) - matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( - opExpr + thisExpr - ) + matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) elif arity == 2: if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( - lastExpr + OneOrMore(opExpr + thisExpr) - ) + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) else: - matchExpr = FollowedBy(lastExpr + thisExpr) + Group( - lastExpr + OneOrMore(thisExpr) - ) + matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) elif arity == 3: - matchExpr = FollowedBy( - lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr - ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ + Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) else: - raise ValueError( - "operator must be unary (1), binary (2), or ternary (3)" - ) + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") else: raise ValueError("operator must indicate right or left associativity") if pa: @@ -5749,27 +5113,19 @@ def infixNotation(baseExpr, opList, lpar=Suppress("("), rpar=Suppress(")")): matchExpr.setParseAction(*pa) else: matchExpr.setParseAction(pa) - thisExpr <<= matchExpr.setName(termName) | lastExpr + thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) lastExpr = thisExpr ret <<= lastExpr return ret - operatorPrecedence = infixNotation """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" -dblQuotedString = Combine( - Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' -).setName("string enclosed in double quotes") -sglQuotedString = Combine( - Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" -).setName("string enclosed in single quotes") -quotedString = Combine( - Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' - | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" -).setName("quotedString using single or double quotes") -unicodeString = Combine(_L("u") + quotedString.copy()).setName("unicode string literal") - +dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") +sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") +quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| + Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") +unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): """ @@ -5803,23 +5159,23 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) - c_function = (decl_data_type("type") + c_function = (decl_data_type("type") + ident("name") - + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + code_body("body")) c_function.ignore(cStyleComment) - + source_code = ''' - int is_odd(int x) { - return (x%2); + int is_odd(int x) { + return (x%2); } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { + + int dec_to_hex(char hchar) { + if (hchar >= '0' && hchar <= '9') { + return (ord(hchar)-ord('0')); + } else { return (10+ord(hchar)-ord('A')); - } + } } ''' for func in c_function.searchString(source_code): @@ -5832,56 +5188,35 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: - if isinstance(opener, basestring) and isinstance(closer, basestring): - if len(opener) == 1 and len(closer) == 1: + if isinstance(opener,basestring) and isinstance(closer,basestring): + if len(opener) == 1 and len(closer)==1: if ignoreExpr is not None: - content = Combine( - OneOrMore( - ~ignoreExpr - + CharsNotIn( - opener + closer + ParserElement.DEFAULT_WHITE_CHARS, - exact=1, - ) - ) - ).setParseAction(lambda t: t[0].strip()) + content = (Combine(OneOrMore(~ignoreExpr + + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) else: - content = empty.copy() + CharsNotIn( - opener + closer + ParserElement.DEFAULT_WHITE_CHARS - ).setParseAction(lambda t: t[0].strip()) + content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t:t[0].strip())) else: if ignoreExpr is not None: - content = Combine( - OneOrMore( - ~ignoreExpr - + ~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) - ) - ).setParseAction(lambda t: t[0].strip()) + content = (Combine(OneOrMore(~ignoreExpr + + ~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) else: - content = Combine( - OneOrMore( - ~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) - ) - ).setParseAction(lambda t: t[0].strip()) + content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) else: - raise ValueError( - "opening and closing arguments must be strings if no content expression" - " is given" - ) + raise ValueError("opening and closing arguments must be strings if no content expression is given") ret = Forward() if ignoreExpr is not None: - ret <<= Group( - Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) - ) + ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) else: - ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) - ret.setName("nested %s%s expression" % (opener, closer)) + ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) + ret.setName('nested %s%s expression' % (opener,closer)) return ret - def indentedBlock(blockStatementExpr, indentStack, indent=True): """ Helper method for defining space-delimited indentation blocks, such as @@ -5958,75 +5293,56 @@ def eggs(z): 'spam', ['(', 'x', 'y', ')'], ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] + [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] """ - - def checkPeerIndent(s, l, t): - if l >= len(s): - return - curCol = col(l, s) + def checkPeerIndent(s,l,t): + if l >= len(s): return + curCol = col(l,s) if curCol != indentStack[-1]: if curCol > indentStack[-1]: - raise ParseFatalException(s, l, "illegal nesting") - raise ParseException(s, l, "not a peer entry") + raise ParseFatalException(s,l,"illegal nesting") + raise ParseException(s,l,"not a peer entry") - def checkSubIndent(s, l, t): - curCol = col(l, s) + def checkSubIndent(s,l,t): + curCol = col(l,s) if curCol > indentStack[-1]: - indentStack.append(curCol) + indentStack.append( curCol ) else: - raise ParseException(s, l, "not a subentry") - - def checkUnindent(s, l, t): - if l >= len(s): - return - curCol = col(l, s) - if not (indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): - raise ParseException(s, l, "not an unindent") + raise ParseException(s,l,"not a subentry") + + def checkUnindent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + raise ParseException(s,l,"not an unindent") indentStack.pop() NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) - INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName("INDENT") - PEER = Empty().setParseAction(checkPeerIndent).setName("") - UNDENT = Empty().setParseAction(checkUnindent).setName("UNINDENT") + INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') + PEER = Empty().setParseAction(checkPeerIndent).setName('') + UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') if indent: - smExpr = Group( - Optional(NL) - + - # ~ FollowedBy(blockStatementExpr) + - INDENT - + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) - + UNDENT - ) + smExpr = Group( Optional(NL) + + #~ FollowedBy(blockStatementExpr) + + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) else: - smExpr = Group( - Optional(NL) + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) - ) + smExpr = Group( Optional(NL) + + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.setName("indented block") - + return smExpr.setName('indented block') alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") -anyOpenTag, anyCloseTag = makeHTMLTags( - Word(alphas, alphanums + "_:").setName("any tag") -) -_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), "><& \"'")) -commonHTMLEntity = Regex( - "&(?P" + "|".join(_htmlEntityMap.keys()) + ");" -).setName("common HTML entity") - - +anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) +_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) +commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") def replaceHTMLEntity(t): """Helper parser action to replace common HTML entities with their special characters""" return _htmlEntityMap.get(t.entity) - # it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").setName( - "C style comment" -) +cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") "Comment of the form C{/* ... */}" htmlComment = Regex(r"").setName("HTML comment") @@ -6036,9 +5352,7 @@ def replaceHTMLEntity(t): dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") "Comment of the form C{// ... (to end of line)}" -cppStyleComment = Combine( - Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment -).setName("C++ style comment") +cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" javaStyleComment = cppStyleComment @@ -6047,19 +5361,10 @@ def replaceHTMLEntity(t): pythonStyleComment = Regex(r"#.*").setName("Python style comment") "Comment of the form C{# ... (to end of line)}" -_commasepitem = ( - Combine( - OneOrMore( - Word(printables, excludeChars=",") - + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()) - ) - ) - .streamline() - .setName("commaItem") -) -commaSeparatedList = delimitedList( - Optional(quotedString.copy() | _commasepitem, default="") -).setName("commaSeparatedList") +_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + + Optional( Word(" \t") + + ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") +commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" @@ -6213,80 +5518,48 @@ class pyparsing_common: integer = Word(nums).setName("integer").setParseAction(convertToInteger) """expression that parses an unsigned integer, returns an int""" - hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16)) + hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) """expression that parses a hexadecimal integer, returns an int""" - signed_integer = ( - Regex(r"[+-]?\d+").setName("signed integer").setParseAction(convertToInteger) - ) + signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) """expression that parses an integer with optional leading sign, returns an int""" - fraction = ( - signed_integer().setParseAction(convertToFloat) - + "/" - + signed_integer().setParseAction(convertToFloat) - ).setName("fraction") + fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") """fractional expression of an integer divided by an integer, returns a float""" - fraction.addParseAction(lambda t: t[0] / t[-1]) + fraction.addParseAction(lambda t: t[0]/t[-1]) - mixed_integer = ( - fraction | signed_integer + Optional(Optional("-").suppress() + fraction) - ).setName("fraction or mixed integer-fraction") + mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" mixed_integer.addParseAction(sum) - real = Regex(r"[+-]?\d+\.\d*").setName("real number").setParseAction(convertToFloat) + real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) """expression that parses a floating point number and returns a float""" - sci_real = ( - Regex(r"[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)") - .setName("real number with scientific notation") - .setParseAction(convertToFloat) - ) + sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) """expression that parses a floating point number with optional scientific notation and returns a float""" # streamlining this expression makes the docs nicer-looking number = (sci_real | real | signed_integer).streamline() """any numeric expression, returns the corresponding Python type""" - fnumber = ( - Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?") - .setName("fnumber") - .setParseAction(convertToFloat) - ) + fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) """any int or real number, returned as float""" - - identifier = Word(alphas + "_", alphanums + "_").setName("identifier") + + identifier = Word(alphas+'_', alphanums+'_').setName("identifier") """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex( - r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}" - ).setName("IPv4 address") + + ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") "IPv4 address (C{0.0.0.0 - 255.255.255.255})" - _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").setName("hex_integer") - _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).setName( - "full IPv6 address" - ) - _short_ipv6_address = ( - Optional(_ipv6_part + (":" + _ipv6_part) * (0, 6)) - + "::" - + Optional(_ipv6_part + (":" + _ipv6_part) * (0, 6)) - ).setName("short IPv6 address") - _short_ipv6_address.addCondition( - lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8 - ) + _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") + _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") + _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") + _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") - ipv6_address = Combine( - (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName( - "IPv6 address" - ) - ).setName("IPv6 address") + ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") "IPv6 address (long, short, or mixed form)" - - mac_address = Regex( - r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}" - ).setName("MAC address") + + mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" @staticmethod @@ -6304,13 +5577,11 @@ def convertToDate(fmt="%Y-%m-%d"): prints:: [datetime.date(1999, 12, 31)] """ - - def cvt_fn(s, l, t): + def cvt_fn(s,l,t): try: return datetime.strptime(t[0], fmt).date() except ValueError as ve: raise ParseException(s, l, str(ve)) - return cvt_fn @staticmethod @@ -6328,61 +5599,41 @@ def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): prints:: [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] """ - - def cvt_fn(s, l, t): + def cvt_fn(s,l,t): try: return datetime.strptime(t[0], fmt) except ValueError as ve: raise ParseException(s, l, str(ve)) - return cvt_fn - iso8601_date = Regex( - r"(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?" - ).setName("ISO8601 date") + iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") "ISO8601 date (C{yyyy-mm-dd})" - iso8601_datetime = Regex( - r"(?P\d{4})-(?P\d\d)-(?P\d\d)[T" - r" ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?" - ).setName("ISO8601 datetime") + iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" - uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").setName("UUID") + uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() - @staticmethod def stripHTMLTags(s, l, tokens): """ Parse action to remove HTML tags from web page HTML source Example:: - # strip HTML links from normal text + # strip HTML links from normal text text = 'More info at the
pyparsing wiki page' td,td_end = makeHTMLTags("TD") table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - + print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' """ return pyparsing_common._html_stripper.transformString(tokens[0]) - _commasepitem = ( - Combine( - OneOrMore( - ~Literal(",") - + ~LineEnd() - + Word(printables, excludeChars=",") - + Optional(White(" \t")) - ) - ) - .streamline() - .setName("commaItem") - ) - comma_separated_list = delimitedList( - Optional(quotedString.copy() | _commasepitem, default="") - ).setName("comma separated list") + _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') + + Optional( White(" \t") ) ) ).streamline().setName("commaItem") + comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) @@ -6394,28 +5645,22 @@ def stripHTMLTags(s, l, tokens): if __name__ == "__main__": - selectToken = CaselessLiteral("select") - fromToken = CaselessLiteral("from") + selectToken = CaselessLiteral("select") + fromToken = CaselessLiteral("from") - ident = Word(alphas, alphanums + "_$") + ident = Word(alphas, alphanums + "_$") - columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) columnNameList = Group(delimitedList(columnName)).setName("columns") - columnSpec = "*" | columnNameList + columnSpec = ('*' | columnNameList) - tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - tableNameList = Group(delimitedList(tableName)).setName("tables") - - simpleSQL = ( - selectToken("command") - + columnSpec("columns") - + fromToken - + tableNameList("tables") - ) + tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + tableNameList = Group(delimitedList(tableName)).setName("tables") + + simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") # demo runTests method, including embedded comments in test string - simpleSQL.runTests( - """ + simpleSQL.runTests(""" # '*' as column list and dotted table name select * from SYS.XYZZY @@ -6437,44 +5682,34 @@ def stripHTMLTags(s, l, tokens): # invalid column name - should fail Select ^^^ frox Sys.dual - """ - ) + """) - pyparsing_common.number.runTests( - """ + pyparsing_common.number.runTests(""" 100 -100 +100 3.14159 6.02e23 1e-12 - """ - ) + """) # any int or real number, returned as float - pyparsing_common.fnumber.runTests( - """ + pyparsing_common.fnumber.runTests(""" 100 -100 +100 3.14159 6.02e23 1e-12 - """ - ) + """) - pyparsing_common.hex_integer.runTests( - """ + pyparsing_common.hex_integer.runTests(""" 100 FF - """ - ) + """) import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests( - """ + pyparsing_common.uuid.runTests(""" 12345678-1234-5678-1234-567812345678 - """ - ) + """) diff --git a/doc/tutorial/text_analytics/data/languages/fetch_data.py b/doc/tutorial/text_analytics/data/languages/fetch_data.py index 86f2a7c04f3ed..2dd0f208ade86 100644 --- a/doc/tutorial/text_analytics/data/languages/fetch_data.py +++ b/doc/tutorial/text_analytics/data/languages/fetch_data.py @@ -1,32 +1,34 @@ + # simple python script to collect text paragraphs from various languages on the # same topic namely the Wikipedia encyclopedia itself -import codecs import os from urllib.request import Request, build_opener import lxml.html -import numpy as np from lxml.etree import ElementTree +import numpy as np + +import codecs pages = { - "ar": "http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7", # noqa: E501 - "de": "http://de.wikipedia.org/wiki/Wikipedia", - "en": "https://en.wikipedia.org/wiki/Wikipedia", - "es": "http://es.wikipedia.org/wiki/Wikipedia", - "fr": "http://fr.wikipedia.org/wiki/Wikip%C3%A9dia", - "it": "http://it.wikipedia.org/wiki/Wikipedia", - "ja": "http://ja.wikipedia.org/wiki/Wikipedia", - "nl": "http://nl.wikipedia.org/wiki/Wikipedia", - "pl": "http://pl.wikipedia.org/wiki/Wikipedia", - "pt": "http://pt.wikipedia.org/wiki/Wikip%C3%A9dia", - "ru": "http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F", # noqa: E501 - # u'zh': u'http://zh.wikipedia.org/wiki/Wikipedia', + 'ar': 'http://ar.wikipedia.org/wiki/%D9%88%D9%8A%D9%83%D9%8A%D8%A8%D9%8A%D8%AF%D9%8A%D8%A7', # noqa: E501 + 'de': 'http://de.wikipedia.org/wiki/Wikipedia', + 'en': 'https://en.wikipedia.org/wiki/Wikipedia', + 'es': 'http://es.wikipedia.org/wiki/Wikipedia', + 'fr': 'http://fr.wikipedia.org/wiki/Wikip%C3%A9dia', + 'it': 'http://it.wikipedia.org/wiki/Wikipedia', + 'ja': 'http://ja.wikipedia.org/wiki/Wikipedia', + 'nl': 'http://nl.wikipedia.org/wiki/Wikipedia', + 'pl': 'http://pl.wikipedia.org/wiki/Wikipedia', + 'pt': 'http://pt.wikipedia.org/wiki/Wikip%C3%A9dia', + 'ru': 'http://ru.wikipedia.org/wiki/%D0%92%D0%B8%D0%BA%D0%B8%D0%BF%D0%B5%D0%B4%D0%B8%D1%8F', # noqa: E501 +# u'zh': u'http://zh.wikipedia.org/wiki/Wikipedia', } -html_folder = "html" -text_folder = "paragraphs" -short_text_folder = "short_paragraphs" +html_folder = 'html' +text_folder = 'paragraphs' +short_text_folder = 'short_paragraphs' n_words_per_short_text = 5 @@ -44,41 +46,42 @@ os.makedirs(short_text_lang_folder) opener = build_opener() - html_filename = os.path.join(html_folder, lang + ".html") + html_filename = os.path.join(html_folder, lang + '.html') if not os.path.exists(html_filename): print("Downloading %s" % page) request = Request(page) # change the User Agent to avoid being blocked by Wikipedia # downloading a couple of articles should not be considered abusive - request.add_header("User-Agent", "OpenAnything/1.0") + request.add_header('User-Agent', 'OpenAnything/1.0') html_content = opener.open(request).read() - with open(html_filename, "wb") as f: + with open(html_filename, 'wb') as f: f.write(html_content) # decode the payload explicitly as UTF-8 since lxml is confused for some # reason - with codecs.open(html_filename, "r", "utf-8") as html_file: + with codecs.open(html_filename,'r','utf-8') as html_file: html_content = html_file.read() tree = ElementTree(lxml.html.document_fromstring(html_content)) i = 0 j = 0 - for p in tree.findall("//p"): + for p in tree.findall('//p'): content = p.text_content() if len(content) < 100: # skip paragraphs that are too short - probably too noisy and not # representative of the actual language continue - text_filename = os.path.join(text_lang_folder, "%s_%04d.txt" % (lang, i)) + text_filename = os.path.join(text_lang_folder, + '%s_%04d.txt' % (lang, i)) print("Writing %s" % text_filename) - with open(text_filename, "wb") as f: - f.write(content.encode("utf-8", "ignore")) + with open(text_filename, 'wb') as f: + f.write(content.encode('utf-8', 'ignore')) i += 1 # split the paragraph into fake smaller paragraphs to make the # problem harder e.g. more similar to tweets - if lang in ("zh", "ja"): - # FIXME: whitespace tokenizing does not work on chinese and japanese + if lang in ('zh', 'ja'): + # FIXME: whitespace tokenizing does not work on chinese and japanese continue words = content.split() n_groups = len(words) / n_words_per_short_text @@ -89,12 +92,12 @@ for group in groups: small_content = " ".join(group) - short_text_filename = os.path.join( - short_text_lang_folder, "%s_%04d.txt" % (lang, j) - ) + short_text_filename = os.path.join(short_text_lang_folder, + '%s_%04d.txt' % (lang, j)) print("Writing %s" % short_text_filename) - with open(short_text_filename, "wb") as f: - f.write(small_content.encode("utf-8", "ignore")) + with open(short_text_filename, 'wb') as f: + f.write(small_content.encode('utf-8', 'ignore')) j += 1 if j >= 1000: break + diff --git a/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py b/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py index e74dfc621ebda..e591aca0f241b 100644 --- a/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py +++ b/doc/tutorial/text_analytics/data/movie_reviews/fetch_data.py @@ -5,9 +5,11 @@ from contextlib import closing from urllib.request import urlopen -URL = "http://www.cs.cornell.edu/people/pabo/movie-review-data/review_polarity.tar.gz" -ARCHIVE_NAME = URL.rsplit("/", 1)[1] +URL = ("http://www.cs.cornell.edu/people/pabo/" + "movie-review-data/review_polarity.tar.gz") + +ARCHIVE_NAME = URL.rsplit('/', 1)[1] DATA_FOLDER = "txt_sentoken" @@ -16,10 +18,10 @@ if not os.path.exists(ARCHIVE_NAME): print("Downloading dataset from %s (3 MB)" % URL) opener = urlopen(URL) - with open(ARCHIVE_NAME, "wb") as archive: + with open(ARCHIVE_NAME, 'wb') as archive: archive.write(opener.read()) print("Decompressing %s" % ARCHIVE_NAME) with closing(tarfile.open(ARCHIVE_NAME, "r:gz")) as archive: - archive.extractall(path=".") + archive.extractall(path='.') os.remove(ARCHIVE_NAME) diff --git a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py index e0b028f52d342..438481120d126 100644 --- a/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py +++ b/doc/tutorial/text_analytics/skeletons/exercise_01_language_train_model.py @@ -11,12 +11,13 @@ import sys -from sklearn import metrics +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import Perceptron +from sklearn.pipeline import Pipeline from sklearn.datasets import load_files -from sklearn.feature_extraction.text import TfidfVectorizer # noqa -from sklearn.linear_model import Perceptron # noqa from sklearn.model_selection import train_test_split -from sklearn.pipeline import Pipeline # noqa +from sklearn import metrics + # The training data folder must be passed as first argument languages_data_folder = sys.argv[1] @@ -24,8 +25,7 @@ # Split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.5 -) + dataset.data, dataset.target, test_size=0.5) # TASK: Build a vectorizer that splits strings into sequence of 1 to 3 @@ -39,27 +39,24 @@ # TASK: Predict the outcome on the testing set in a variable named y_predicted # Print the classification report -print( - metrics.classification_report( - y_test, y_predicted, target_names=dataset.target_names # noqa - ) -) +print(metrics.classification_report(y_test, y_predicted, + target_names=dataset.target_names)) # Plot the confusion matrix -cm = metrics.confusion_matrix(y_test, y_predicted) # noqa +cm = metrics.confusion_matrix(y_test, y_predicted) print(cm) -# import matplotlib.pyplot as plt -# plt.matshow(cm, cmap=plt.cm.jet) -# plt.show() +#import matplotlib.pyplot as plt +#plt.matshow(cm, cmap=plt.cm.jet) +#plt.show() # Predict the result on some short new sentences: sentences = [ - "This is a language detection test.", - "Ceci est un test de d\xe9tection de la langue.", - "Dies ist ein Test, um die Sprache zu erkennen.", + 'This is a language detection test.', + 'Ceci est un test de d\xe9tection de la langue.', + 'Dies ist ein Test, um die Sprache zu erkennen.', ] -predicted = clf.predict(sentences) # noqa +predicted = clf.predict(sentences) for s, p in zip(sentences, predicted): print('The language of "%s" is "%s"' % (s, dataset.target_names[p])) diff --git a/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py b/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py index afefcac4a31bb..23299f5f01b3d 100644 --- a/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py +++ b/doc/tutorial/text_analytics/skeletons/exercise_02_sentiment.py @@ -12,13 +12,14 @@ # License: Simplified BSD import sys - -from sklearn import metrics +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.svm import LinearSVC +from sklearn.pipeline import Pipeline +from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_files -from sklearn.feature_extraction.text import TfidfVectorizer # noqa -from sklearn.model_selection import GridSearchCV, train_test_split # noqa -from sklearn.pipeline import Pipeline # noqa -from sklearn.svm import LinearSVC # noqa +from sklearn.model_selection import train_test_split +from sklearn import metrics + if __name__ == "__main__": # NOTE: we put the following in a 'if __name__ == "__main__"' protected @@ -34,8 +35,7 @@ # split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.25, random_state=None - ) + dataset.data, dataset.target, test_size=0.25, random_state=None) # TASK: Build a vectorizer / classifier pipeline that filters out tokens # that are too rare or too frequent @@ -51,14 +51,11 @@ # named y_predicted # Print the classification report - print( - metrics.classification_report( - y_test, y_predicted, target_names=dataset.target_names # noqa - ) - ) + print(metrics.classification_report(y_test, y_predicted, + target_names=dataset.target_names)) # Print and plot the confusion matrix - cm = metrics.confusion_matrix(y_test, y_predicted) # noqa + cm = metrics.confusion_matrix(y_test, y_predicted) print(cm) # import matplotlib.pyplot as plt diff --git a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py index 5ab6ae73afa10..21cee0c80e00e 100644 --- a/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py +++ b/doc/tutorial/text_analytics/solutions/exercise_01_language_train_model.py @@ -11,12 +11,13 @@ import sys -from sklearn import metrics -from sklearn.datasets import load_files from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import Perceptron -from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline +from sklearn.datasets import load_files +from sklearn.model_selection import train_test_split +from sklearn import metrics + # The training data folder must be passed as first argument languages_data_folder = sys.argv[1] @@ -24,22 +25,20 @@ # Split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.5 -) + dataset.data, dataset.target, test_size=0.5) # TASK: Build a vectorizer that splits strings into sequence of 1 to 3 # characters instead of word tokens -vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer="char", use_idf=False) +vectorizer = TfidfVectorizer(ngram_range=(1, 3), analyzer='char', + use_idf=False) # TASK: Build a vectorizer / classifier pipeline using the previous analyzer # the pipeline instance should stored in a variable named clf -clf = Pipeline( - [ - ("vec", vectorizer), - ("clf", Perceptron()), - ] -) +clf = Pipeline([ + ('vec', vectorizer), + ('clf', Perceptron()), +]) # TASK: Fit the pipeline on the training set clf.fit(docs_train, y_train) @@ -48,25 +47,22 @@ y_predicted = clf.predict(docs_test) # Print the classification report -print( - metrics.classification_report( - y_test, y_predicted, target_names=dataset.target_names - ) -) +print(metrics.classification_report(y_test, y_predicted, + target_names=dataset.target_names)) # Plot the confusion matrix cm = metrics.confusion_matrix(y_test, y_predicted) print(cm) -# import matlotlib.pyplot as plt -# plt.matshow(cm, cmap=plt.cm.jet) -# plt.show() +#import matlotlib.pyplot as plt +#plt.matshow(cm, cmap=plt.cm.jet) +#plt.show() # Predict the result on some short new sentences: sentences = [ - "This is a language detection test.", - "Ceci est un test de d\xe9tection de la langue.", - "Dies ist ein Test, um die Sprache zu erkennen.", + 'This is a language detection test.', + 'Ceci est un test de d\xe9tection de la langue.', + 'Dies ist ein Test, um die Sprache zu erkennen.', ] predicted = clf.predict(sentences) diff --git a/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py b/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py index 013753c57b6b7..434bece341975 100644 --- a/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py +++ b/doc/tutorial/text_analytics/solutions/exercise_02_sentiment.py @@ -12,13 +12,14 @@ # License: Simplified BSD import sys - -from sklearn import metrics -from sklearn.datasets import load_files from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.model_selection import GridSearchCV, train_test_split -from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC +from sklearn.pipeline import Pipeline +from sklearn.model_selection import GridSearchCV +from sklearn.datasets import load_files +from sklearn.model_selection import train_test_split +from sklearn import metrics + if __name__ == "__main__": # NOTE: we put the following in a 'if __name__ == "__main__"' protected @@ -34,51 +35,40 @@ # split the dataset in training and test set: docs_train, docs_test, y_train, y_test = train_test_split( - dataset.data, dataset.target, test_size=0.25, random_state=None - ) + dataset.data, dataset.target, test_size=0.25, random_state=None) # TASK: Build a vectorizer / classifier pipeline that filters out tokens # that are too rare or too frequent - pipeline = Pipeline( - [ - ("vect", TfidfVectorizer(min_df=3, max_df=0.95)), - ("clf", LinearSVC(C=1000)), - ] - ) + pipeline = Pipeline([ + ('vect', TfidfVectorizer(min_df=3, max_df=0.95)), + ('clf', LinearSVC(C=1000)), + ]) # TASK: Build a grid search to find out whether unigrams or bigrams are # more useful. # Fit the pipeline on the training set using grid search for the parameters parameters = { - "vect__ngram_range": [(1, 1), (1, 2)], + 'vect__ngram_range': [(1, 1), (1, 2)], } grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1) grid_search.fit(docs_train, y_train) # TASK: print the mean and std for each candidate along with the parameter # settings for all the candidates explored by grid search. - n_candidates = len(grid_search.cv_results_["params"]) + n_candidates = len(grid_search.cv_results_['params']) for i in range(n_candidates): - print( - i, - "params - %s; mean - %0.2f; std - %0.2f" - % ( - grid_search.cv_results_["params"][i], - grid_search.cv_results_["mean_test_score"][i], - grid_search.cv_results_["std_test_score"][i], - ), - ) + print(i, 'params - %s; mean - %0.2f; std - %0.2f' + % (grid_search.cv_results_['params'][i], + grid_search.cv_results_['mean_test_score'][i], + grid_search.cv_results_['std_test_score'][i])) # TASK: Predict the outcome on the testing set and store it in a variable # named y_predicted y_predicted = grid_search.predict(docs_test) # Print the classification report - print( - metrics.classification_report( - y_test, y_predicted, target_names=dataset.target_names - ) - ) + print(metrics.classification_report(y_test, y_predicted, + target_names=dataset.target_names)) # Print and plot the confusion matrix cm = metrics.confusion_matrix(y_test, y_predicted) diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py index 9671f791153fc..10ab666ab277e 100644 --- a/examples/applications/plot_cyclical_feature_engineering.py +++ b/examples/applications/plot_cyclical_feature_engineering.py @@ -35,6 +35,7 @@ # demand around the middle of the days: import matplotlib.pyplot as plt + fig, ax = plt.subplots(figsize=(12, 4)) average_week_demand = df.groupby(["weekday", "hour"]).mean()["count"] average_week_demand.plot(ax=ax) @@ -160,10 +161,6 @@ # %% X.iloc[train_4] -from sklearn.compose import ColumnTransformer -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.model_selection import cross_validate - # %% # All is well. We are now ready to do some predictive modeling! # @@ -186,6 +183,10 @@ # we only try the default hyper-parameters for this model: from sklearn.pipeline import make_pipeline from sklearn.preprocessing import OrdinalEncoder +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.model_selection import cross_validate + categorical_columns = [ "weather", @@ -238,10 +239,6 @@ def evaluate(model, X, y, cv): evaluate(gbrt_pipeline, X, y, cv=ts_cv) -import numpy as np - -from sklearn.linear_model import RidgeCV - # %% # This model has an average error around 4 to 5% of the maximum demand. This is # quite good for a first trial without any hyper-parameter tuning! We just had @@ -260,7 +257,11 @@ def evaluate(model, X, y, cv): # For consistency, we scale the numerical features to the same 0-1 range using # class:`sklearn.preprocessing.MinMaxScaler`, although in this case it does not # impact the results much because they are already on comparable scales: -from sklearn.preprocessing import MinMaxScaler, OneHotEncoder +from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import MinMaxScaler +from sklearn.linear_model import RidgeCV +import numpy as np + one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse=False) alphas = np.logspace(-6, 6, 25) @@ -601,8 +602,6 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # "workingday" and features derived from "hours". This issue will be addressed # in the following section. -from sklearn.pipeline import FeatureUnion - # %% # Modeling pairwise interactions with splines and polynomial features # ------------------------------------------------------------------- @@ -616,6 +615,8 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # grained spline encoded hours to model the "workingday"/"hours" interaction # explicitly without introducing too many new variables: from sklearn.preprocessing import PolynomialFeatures +from sklearn.pipeline import FeatureUnion + hour_workday_interaction = make_pipeline( ColumnTransformer( @@ -662,6 +663,7 @@ def periodic_spline_transformer(period, n_splines=None, degree=3): # polynomial kernel expansion. Let us try the latter: from sklearn.kernel_approximation import Nystroem + cyclic_spline_poly_pipeline = make_pipeline( cyclic_spline_transformer, Nystroem(kernel="poly", degree=2, n_components=300, random_state=0), diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py index 93af1a4f5f89f..84702034152f5 100644 --- a/examples/applications/plot_digits_denoising.py +++ b/examples/applications/plot_digits_denoising.py @@ -32,10 +32,9 @@ # :func:`~sklearn.datasets.fetch_openml` to get this dataset. In addition, we # normalize the dataset such that all pixel values are in the range (0, 1). import numpy as np - from sklearn.datasets import fetch_openml -from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import train_test_split X, y = fetch_openml(data_id=41082, as_frame=False, return_X_y=True, parser="pandas") X = MinMaxScaler().fit_transform(X) diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py index 34f40da368d03..069f0f5aad202 100644 --- a/examples/applications/plot_face_recognition.py +++ b/examples/applications/plot_face_recognition.py @@ -13,17 +13,19 @@ """ # %% from time import time - import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.model_selection import RandomizedSearchCV from sklearn.datasets import fetch_lfw_people -from sklearn.decomposition import PCA -from sklearn.metrics import ConfusionMatrixDisplay, classification_report -from sklearn.model_selection import RandomizedSearchCV, train_test_split +from sklearn.metrics import classification_report +from sklearn.metrics import ConfusionMatrixDisplay from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA from sklearn.svm import SVC from sklearn.utils.fixes import loguniform + # %% # Download the data, if not already on disk and load it as numpy arrays diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py index d96e88657e4c8..d05f4ab497ada 100644 --- a/examples/applications/plot_model_complexity_influence.py +++ b/examples/applications/plot_model_complexity_influence.py @@ -38,16 +38,16 @@ # License: BSD 3 clause import time - -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.linear_model import SGDClassifier -from sklearn.metrics import hamming_loss, mean_squared_error from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error from sklearn.svm import NuSVR +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.linear_model import SGDClassifier +from sklearn.metrics import hamming_loss # Initialize random generator np.random.seed(0) diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py index 8468c1440800a..a8e4f9b72a3b0 100644 --- a/examples/applications/plot_out_of_core_classification.py +++ b/examples/applications/plot_out_of_core_classification.py @@ -18,23 +18,25 @@ # @FedericoV # License: BSD 3 clause +from glob import glob import itertools import os.path import re -import sys import tarfile import time -from glob import glob -from html.parser import HTMLParser -from urllib.request import urlretrieve +import sys -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib import rcParams +from html.parser import HTMLParser +from urllib.request import urlretrieve from sklearn.datasets import get_data_home from sklearn.feature_extraction.text import HashingVectorizer -from sklearn.linear_model import PassiveAggressiveClassifier, Perceptron, SGDClassifier +from sklearn.linear_model import SGDClassifier +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.linear_model import Perceptron from sklearn.naive_bayes import MultinomialNB diff --git a/examples/applications/plot_outlier_detection_wine.py b/examples/applications/plot_outlier_detection_wine.py index c4adfa222a5dd..45e4c64d9fcc4 100644 --- a/examples/applications/plot_outlier_detection_wine.py +++ b/examples/applications/plot_outlier_detection_wine.py @@ -37,13 +37,12 @@ # Author: Virgile Fritsch # License: BSD 3 clause -import matplotlib.font_manager -import matplotlib.pyplot as plt import numpy as np - from sklearn.covariance import EllipticEnvelope -from sklearn.datasets import load_wine from sklearn.svm import OneClassSVM +import matplotlib.pyplot as plt +import matplotlib.font_manager +from sklearn.datasets import load_wine # Define "classifiers" to be used classifiers = { diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py index 8fce81fb9fb4e..9b99bcbfdfaf1 100644 --- a/examples/applications/plot_prediction_latency.py +++ b/examples/applications/plot_prediction_latency.py @@ -16,18 +16,19 @@ # Authors: Eustache Diemert # License: BSD 3 clause -import gc -import time from collections import defaultdict -import matplotlib.pyplot as plt +import time +import gc import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split from sklearn.datasets import make_regression from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import Ridge, SGDRegressor -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import Ridge +from sklearn.linear_model import SGDRegressor from sklearn.svm import SVR from sklearn.utils import shuffle diff --git a/examples/applications/plot_species_distribution_modeling.py b/examples/applications/plot_species_distribution_modeling.py index cbf03e04efd23..3f932c3f6562c 100644 --- a/examples/applications/plot_species_distribution_modeling.py +++ b/examples/applications/plot_species_distribution_modeling.py @@ -43,12 +43,12 @@ from time import time -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn import metrics, svm -from sklearn.datasets import fetch_species_distributions from sklearn.utils import Bunch +from sklearn.datasets import fetch_species_distributions +from sklearn import svm, metrics # if basemap is available, we'll use it. # otherwise, we'll improvise later... diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py index f295a9d123572..39708be5ef3e0 100644 --- a/examples/applications/plot_stock_market.py +++ b/examples/applications/plot_stock_market.py @@ -23,7 +23,6 @@ # alphavantage.co . import sys - import numpy as np import pandas as pd diff --git a/examples/applications/plot_tomography_l1_reconstruction.py b/examples/applications/plot_tomography_l1_reconstruction.py index d851613402571..9ac351c12206c 100644 --- a/examples/applications/plot_tomography_l1_reconstruction.py +++ b/examples/applications/plot_tomography_l1_reconstruction.py @@ -39,11 +39,12 @@ class :class:`~sklearn.linear_model.Lasso`, that uses the coordinate descent # Author: Emmanuelle Gouillart # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np -from scipy import ndimage, sparse - -from sklearn.linear_model import Lasso, Ridge +from scipy import sparse +from scipy import ndimage +from sklearn.linear_model import Lasso +from sklearn.linear_model import Ridge +import matplotlib.pyplot as plt def _weights(x, dx=1, orig=0): diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py index 0385fd7c89333..38945241ab68b 100644 --- a/examples/applications/plot_topics_extraction_with_nmf_lda.py +++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py @@ -27,12 +27,11 @@ # License: BSD 3 clause from time import time - import matplotlib.pyplot as plt +from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer +from sklearn.decomposition import NMF, MiniBatchNMF, LatentDirichletAllocation from sklearn.datasets import fetch_20newsgroups -from sklearn.decomposition import NMF, LatentDirichletAllocation, MiniBatchNMF -from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer n_samples = 2000 n_features = 1000 diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py index fbbba0dd2344f..c8019fa72ae91 100644 --- a/examples/applications/svm_gui.py +++ b/examples/applications/svm_gui.py @@ -30,13 +30,12 @@ from matplotlib.backends.backend_tkagg import ( NavigationToolbar2TkAgg as NavigationToolbar2Tk, ) +from matplotlib.figure import Figure +from matplotlib.contour import ContourSet import sys -import tkinter as Tk - import numpy as np -from matplotlib.contour import ContourSet -from matplotlib.figure import Figure +import tkinter as Tk from sklearn import svm from sklearn.datasets import dump_svmlight_file diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index 0be1661d7ed5c..fcc337b0a4e00 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -33,17 +33,19 @@ # Author: Olivier Grisel # License: BSD 3 clause -import os from bz2 import BZ2File +import os from datetime import datetime from pprint import pprint from time import time -from urllib.request import urlopen import numpy as np + from scipy import sparse from sklearn.decomposition import randomized_svd +from urllib.request import urlopen + # %% # Download data, if not already on disk diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py index 944ebe270d1a0..615a3d1495eb8 100644 --- a/examples/bicluster/plot_bicluster_newsgroups.py +++ b/examples/bicluster/plot_bicluster_newsgroups.py @@ -23,13 +23,14 @@ """ -import operator from collections import defaultdict +import operator from time import time import numpy as np -from sklearn.cluster import MiniBatchKMeans, SpectralCoclustering +from sklearn.cluster import SpectralCoclustering +from sklearn.cluster import MiniBatchKMeans from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.cluster import v_measure_score diff --git a/examples/bicluster/plot_spectral_biclustering.py b/examples/bicluster/plot_spectral_biclustering.py index 609697bc44d37..3a0af07815c02 100644 --- a/examples/bicluster/plot_spectral_biclustering.py +++ b/examples/bicluster/plot_spectral_biclustering.py @@ -22,10 +22,11 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn.cluster import SpectralBiclustering from sklearn.datasets import make_checkerboard +from sklearn.cluster import SpectralBiclustering from sklearn.metrics import consensus_score + n_clusters = (4, 3) data, rows, columns = make_checkerboard( shape=(300, 300), n_clusters=n_clusters, noise=10, shuffle=False, random_state=0 diff --git a/examples/bicluster/plot_spectral_coclustering.py b/examples/bicluster/plot_spectral_coclustering.py index 92b10d93956e7..0df275e83e3bd 100644 --- a/examples/bicluster/plot_spectral_coclustering.py +++ b/examples/bicluster/plot_spectral_coclustering.py @@ -21,8 +21,8 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn.cluster import SpectralCoclustering from sklearn.datasets import make_biclusters +from sklearn.cluster import SpectralCoclustering from sklearn.metrics import consensus_score data, rows, columns = make_biclusters( diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py index 96deffe4fca6a..75d1ea15b8fbd 100644 --- a/examples/calibration/plot_calibration.py +++ b/examples/calibration/plot_calibration.py @@ -88,12 +88,11 @@ clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test) print("With sigmoid calibration: %1.3f" % clf_sigmoid_score) -import matplotlib.pyplot as plt - # %% # Plot data and the predicted probabilities # ----------------------------------------- from matplotlib import cm +import matplotlib.pyplot as plt plt.figure() y_unique = np.unique(y) diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py index 978d3154183fa..bd36d7e4a654b 100644 --- a/examples/calibration/plot_calibration_curve.py +++ b/examples/calibration/plot_calibration_curve.py @@ -140,11 +140,11 @@ import pandas as pd from sklearn.metrics import ( - brier_score_loss, - f1_score, - log_loss, precision_score, recall_score, + f1_score, + brier_score_loss, + log_loss, roc_auc_score, ) diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py index fc6349f3dea5f..24962a786ea03 100644 --- a/examples/calibration/plot_calibration_multiclass.py +++ b/examples/calibration/plot_calibration_multiclass.py @@ -31,7 +31,6 @@ class of an instance (red: class 1, green: class 2, blue: class 3). # License: BSD Style. import numpy as np - from sklearn.datasets import make_blobs np.random.seed(0) diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py index ec5887b63914d..87c3f51db5eb2 100644 --- a/examples/classification/plot_classification_probability.py +++ b/examples/classification/plot_classification_probability.py @@ -23,12 +23,12 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn import datasets -from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import RBF -from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score +from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC +from sklearn.gaussian_process import GaussianProcessClassifier +from sklearn.gaussian_process.kernels import RBF +from sklearn import datasets iris = datasets.load_iris() X = iris.data[:, 0:2] # we only take the first two features for visualization diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py index 2cb17726131b5..e4c52d9e2564a 100644 --- a/examples/classification/plot_classifier_comparison.py +++ b/examples/classification/plot_classifier_comparison.py @@ -25,23 +25,22 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap - -from sklearn.datasets import make_circles, make_classification, make_moons -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis -from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier -from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import RBF -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.model_selection import train_test_split -from sklearn.naive_bayes import GaussianNB -from sklearn.neighbors import KNeighborsClassifier -from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import StandardScaler +from sklearn.datasets import make_moons, make_circles, make_classification +from sklearn.neural_network import MLPClassifier +from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC +from sklearn.gaussian_process import GaussianProcessClassifier +from sklearn.gaussian_process.kernels import RBF from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier +from sklearn.naive_bayes import GaussianNB +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.inspection import DecisionBoundaryDisplay names = [ "Nearest Neighbors", diff --git a/examples/classification/plot_digits_classification.py b/examples/classification/plot_digits_classification.py index 964f93e3b1e62..385bc865cd48b 100644 --- a/examples/classification/plot_digits_classification.py +++ b/examples/classification/plot_digits_classification.py @@ -15,7 +15,7 @@ import matplotlib.pyplot as plt # Import datasets, classifiers and performance metrics -from sklearn import datasets, metrics, svm +from sklearn import datasets, svm, metrics from sklearn.model_selection import train_test_split ############################################################################### diff --git a/examples/classification/plot_lda.py b/examples/classification/plot_lda.py index aec7565e39174..47487fc1f2caf 100644 --- a/examples/classification/plot_lda.py +++ b/examples/classification/plot_lda.py @@ -8,12 +8,13 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn.covariance import OAS from sklearn.datasets import make_blobs from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.covariance import OAS + n_train = 20 # samples for training n_test = 200 # samples for testing diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py index 4093edea3400a..785a8627c8ca7 100644 --- a/examples/classification/plot_lda_qda.py +++ b/examples/classification/plot_lda_qda.py @@ -15,8 +15,8 @@ class has its own standard deviation with QDA. # Colormap # -------- -import matplotlib as mpl import matplotlib.pyplot as plt +import matplotlib as mpl from matplotlib import colors cmap = colors.LinearSegmentedColormap( @@ -172,10 +172,8 @@ def plot_qda_cov(qda, splot): fontsize=15, ) -from sklearn.discriminant_analysis import ( - LinearDiscriminantAnalysis, - QuadraticDiscriminantAnalysis, -) +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]): # Linear Discriminant Analysis diff --git a/examples/cluster/plot_adjusted_for_chance_measures.py b/examples/cluster/plot_adjusted_for_chance_measures.py index 19f5efa0cc693..7985eb70e6a0b 100644 --- a/examples/cluster/plot_adjusted_for_chance_measures.py +++ b/examples/cluster/plot_adjusted_for_chance_measures.py @@ -24,11 +24,9 @@ # Author: Olivier Grisel # License: BSD 3 clause -from time import time - -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt +from time import time from sklearn import metrics diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py index 335dfafb39b28..3007b0e6539a2 100644 --- a/examples/cluster/plot_affinity_propagation.py +++ b/examples/cluster/plot_affinity_propagation.py @@ -9,8 +9,8 @@ """ -from sklearn import metrics from sklearn.cluster import AffinityPropagation +from sklearn import metrics from sklearn.datasets import make_blobs # %% @@ -44,12 +44,11 @@ % metrics.silhouette_score(X, labels, metric="sqeuclidean") ) -from itertools import cycle - # %% # Plot result # ----------- import matplotlib.pyplot as plt +from itertools import cycle plt.close("all") plt.figure(1) diff --git a/examples/cluster/plot_agglomerative_clustering.py b/examples/cluster/plot_agglomerative_clustering.py index 1c844bd0a7c62..9d590f572f121 100644 --- a/examples/cluster/plot_agglomerative_clustering.py +++ b/examples/cluster/plot_agglomerative_clustering.py @@ -28,7 +28,6 @@ # License: BSD 3 clause import time - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/cluster/plot_agglomerative_dendrogram.py b/examples/cluster/plot_agglomerative_dendrogram.py index 20c22f4f0bb39..2de5030d68f6d 100644 --- a/examples/cluster/plot_agglomerative_dendrogram.py +++ b/examples/cluster/plot_agglomerative_dendrogram.py @@ -10,11 +10,11 @@ """ import numpy as np + from matplotlib import pyplot as plt from scipy.cluster.hierarchy import dendrogram - -from sklearn.cluster import AgglomerativeClustering from sklearn.datasets import load_iris +from sklearn.cluster import AgglomerativeClustering def plot_dendrogram(model, **kwargs): diff --git a/examples/cluster/plot_birch_vs_minibatchkmeans.py b/examples/cluster/plot_birch_vs_minibatchkmeans.py index 55502f446c146..3d4185dc9368a 100644 --- a/examples/cluster/plot_birch_vs_minibatchkmeans.py +++ b/examples/cluster/plot_birch_vs_minibatchkmeans.py @@ -25,17 +25,17 @@ # Alexandre Gramfort # License: BSD 3 clause +from joblib import cpu_count from itertools import cycle from time import time - -import matplotlib.colors as colors -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt +import matplotlib.colors as colors -from joblib import cpu_count from sklearn.cluster import Birch, MiniBatchKMeans from sklearn.datasets import make_blobs + # Generate centers for the blobs so that it forms a 10 X 10 grid. xx = np.linspace(-22, 22, 10) yy = np.linspace(-22, 22, 10) diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py index 095c533cb2aec..818f1cc612c2f 100644 --- a/examples/cluster/plot_bisect_kmeans.py +++ b/examples/cluster/plot_bisect_kmeans.py @@ -13,8 +13,9 @@ """ import matplotlib.pyplot as plt -from sklearn.cluster import BisectingKMeans, KMeans from sklearn.datasets import make_blobs +from sklearn.cluster import BisectingKMeans, KMeans + print(__doc__) diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py index 92d5bd6bcdba9..8b52759c79018 100644 --- a/examples/cluster/plot_cluster_comparison.py +++ b/examples/cluster/plot_cluster_comparison.py @@ -26,14 +26,14 @@ import time import warnings -from itertools import cycle, islice -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn import cluster, datasets, mixture from sklearn.neighbors import kneighbors_graph from sklearn.preprocessing import StandardScaler +from itertools import cycle, islice np.random.seed(0) diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py index 91d3a7317398d..e089e7bdd609c 100644 --- a/examples/cluster/plot_cluster_iris.py +++ b/examples/cluster/plot_cluster_iris.py @@ -19,15 +19,15 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause +import numpy as np import matplotlib.pyplot as plt # Though the following import is not directly being used, it is required # for 3D projection to work with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 -import numpy as np -from sklearn import datasets from sklearn.cluster import KMeans +from sklearn import datasets np.random.seed(5) diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py index aadcfe9faac73..229ca182c1e65 100644 --- a/examples/cluster/plot_coin_segmentation.py +++ b/examples/cluster/plot_coin_segmentation.py @@ -27,14 +27,15 @@ import time -import matplotlib.pyplot as plt import numpy as np from scipy.ndimage import gaussian_filter +import matplotlib.pyplot as plt from skimage.data import coins from skimage.transform import rescale -from sklearn.cluster import spectral_clustering from sklearn.feature_extraction import image +from sklearn.cluster import spectral_clustering + # load the coins as a numpy array orig_coins = coins() diff --git a/examples/cluster/plot_color_quantization.py b/examples/cluster/plot_color_quantization.py index c03a61bac6a80..6fc6cdd4a449f 100644 --- a/examples/cluster/plot_color_quantization.py +++ b/examples/cluster/plot_color_quantization.py @@ -26,15 +26,13 @@ # # License: BSD 3 clause -from time import time - -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.cluster import KMeans -from sklearn.datasets import load_sample_image from sklearn.metrics import pairwise_distances_argmin +from sklearn.datasets import load_sample_image from sklearn.utils import shuffle +from time import time n_colors = 64 diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py index 9d7007e7b68a3..620ee6bdcb86e 100644 --- a/examples/cluster/plot_dbscan.py +++ b/examples/cluster/plot_dbscan.py @@ -10,11 +10,12 @@ import numpy as np -from sklearn import metrics from sklearn.cluster import DBSCAN +from sklearn import metrics from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler + # %% # Generate sample data # -------------------- diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py index 33e91bba67c84..18288da252024 100644 --- a/examples/cluster/plot_digits_agglomeration.py +++ b/examples/cluster/plot_digits_agglomeration.py @@ -13,10 +13,10 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn import cluster, datasets +from sklearn import datasets, cluster from sklearn.feature_extraction.image import grid_to_graph digits = datasets.load_digits() diff --git a/examples/cluster/plot_digits_linkage.py b/examples/cluster/plot_digits_linkage.py index ae67bd5d8e0f4..730f85c543356 100644 --- a/examples/cluster/plot_digits_linkage.py +++ b/examples/cluster/plot_digits_linkage.py @@ -35,7 +35,7 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn import datasets, manifold +from sklearn import manifold, datasets digits = datasets.load_digits() X, y = digits.data, digits.target diff --git a/examples/cluster/plot_face_compress.py b/examples/cluster/plot_face_compress.py index 77263d84cab5f..700d862ec6001 100644 --- a/examples/cluster/plot_face_compress.py +++ b/examples/cluster/plot_face_compress.py @@ -14,12 +14,13 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np import scipy as sp +import matplotlib.pyplot as plt from sklearn import cluster + try: # SciPy >= 0.16 have face in misc from scipy.misc import face diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py index f28ed194d2426..e2273326b9a12 100644 --- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py +++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py @@ -21,17 +21,18 @@ import shutil import tempfile -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from scipy import linalg, ndimage - from joblib import Memory + +from sklearn.feature_extraction.image import grid_to_graph from sklearn import feature_selection from sklearn.cluster import FeatureAgglomeration -from sklearn.feature_extraction.image import grid_to_graph from sklearn.linear_model import BayesianRidge -from sklearn.model_selection import GridSearchCV, KFold from sklearn.pipeline import Pipeline +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import KFold # %% # Set parameters diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py index b6464459160e3..e395571a1caad 100644 --- a/examples/cluster/plot_inductive_clustering.py +++ b/examples/cluster/plot_inductive_clustering.py @@ -24,7 +24,6 @@ # Christos Aridas import matplotlib.pyplot as plt - from sklearn.base import BaseEstimator, clone from sklearn.cluster import AgglomerativeClustering from sklearn.datasets import make_blobs @@ -33,6 +32,7 @@ from sklearn.utils.metaestimators import available_if from sklearn.utils.validation import check_is_fitted + N_SAMPLES = 5000 RANDOM_STATE = 42 diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index d71f884839c1d..94f8ff6c58f52 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -14,8 +14,8 @@ # Author: Phil Roth # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn.datasets import make_blobs diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py index a048a8c1ee11c..fc79c867a8589 100644 --- a/examples/cluster/plot_kmeans_digits.py +++ b/examples/cluster/plot_kmeans_digits.py @@ -34,7 +34,6 @@ # to group images such that the handwritten digits on the image are the same. import numpy as np - from sklearn.datasets import load_digits data, labels = load_digits(return_X_y=True) @@ -54,7 +53,6 @@ # * train and time the pipeline fitting; # * measure the performance of the clustering obtained via different metrics. from time import time - from sklearn import metrics from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler diff --git a/examples/cluster/plot_kmeans_plusplus.py b/examples/cluster/plot_kmeans_plusplus.py index 66206ce7d7038..eea2c2ec85093 100644 --- a/examples/cluster/plot_kmeans_plusplus.py +++ b/examples/cluster/plot_kmeans_plusplus.py @@ -10,10 +10,9 @@ """ -import matplotlib.pyplot as plt - from sklearn.cluster import kmeans_plusplus from sklearn.datasets import make_blobs +import matplotlib.pyplot as plt # Generate sample data n_samples = 4000 diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py index a999e83fcac5d..8f4e241100e24 100644 --- a/examples/cluster/plot_kmeans_silhouette_analysis.py +++ b/examples/cluster/plot_kmeans_silhouette_analysis.py @@ -31,14 +31,14 @@ """ -import matplotlib.cm as cm -import matplotlib.pyplot as plt -import numpy as np - -from sklearn.cluster import KMeans from sklearn.datasets import make_blobs +from sklearn.cluster import KMeans from sklearn.metrics import silhouette_samples, silhouette_score +import matplotlib.pyplot as plt +import matplotlib.cm as cm +import numpy as np + # Generating the sample data from make_blobs # This particular setting has one distinct cluster and 3 clusters placed close # together. diff --git a/examples/cluster/plot_kmeans_stability_low_dim_dense.py b/examples/cluster/plot_kmeans_stability_low_dim_dense.py index 7d8b903ab9623..a375c1cd103d7 100644 --- a/examples/cluster/plot_kmeans_stability_low_dim_dense.py +++ b/examples/cluster/plot_kmeans_stability_low_dim_dense.py @@ -26,12 +26,14 @@ # Author: Olivier Grisel # License: BSD 3 clause -import matplotlib.cm as cm -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt +import matplotlib.cm as cm -from sklearn.cluster import KMeans, MiniBatchKMeans -from sklearn.utils import check_random_state, shuffle +from sklearn.utils import shuffle +from sklearn.utils import check_random_state +from sklearn.cluster import MiniBatchKMeans +from sklearn.cluster import KMeans random_state = np.random.RandomState(0) diff --git a/examples/cluster/plot_linkage_comparison.py b/examples/cluster/plot_linkage_comparison.py index dc009d0110f7c..af4c3cd2894af 100644 --- a/examples/cluster/plot_linkage_comparison.py +++ b/examples/cluster/plot_linkage_comparison.py @@ -25,13 +25,13 @@ import time import warnings -from itertools import cycle, islice -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn import cluster, datasets from sklearn.preprocessing import StandardScaler +from itertools import cycle, islice np.random.seed(0) diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py index 0afa0bde35165..6a6827e5aa49d 100644 --- a/examples/cluster/plot_mean_shift.py +++ b/examples/cluster/plot_mean_shift.py @@ -12,7 +12,6 @@ """ import numpy as np - from sklearn.cluster import MeanShift, estimate_bandwidth from sklearn.datasets import make_blobs @@ -39,12 +38,11 @@ print("number of estimated clusters : %d" % n_clusters_) -from itertools import cycle - # %% # Plot result # ----------- import matplotlib.pyplot as plt +from itertools import cycle plt.figure(1) plt.clf() diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py index cd279781a4b03..de0a185949972 100644 --- a/examples/cluster/plot_mini_batch_kmeans.py +++ b/examples/cluster/plot_mini_batch_kmeans.py @@ -21,7 +21,6 @@ # We start by generating the blobs of data to be clustered. import numpy as np - from sklearn.datasets import make_blobs np.random.seed(0) @@ -36,7 +35,6 @@ # ------------------------------ import time - from sklearn.cluster import KMeans k_means = KMeans(init="k-means++", n_clusters=3, n_init=10) diff --git a/examples/cluster/plot_optics.py b/examples/cluster/plot_optics.py index c15de1e5a989d..5956a2d47afa5 100644 --- a/examples/cluster/plot_optics.py +++ b/examples/cluster/plot_optics.py @@ -20,12 +20,11 @@ # Adrin Jalali # License: BSD 3 clause +from sklearn.cluster import OPTICS, cluster_optics_dbscan import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import numpy as np -from sklearn.cluster import OPTICS, cluster_optics_dbscan - # Generate sample data np.random.seed(0) diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py index cde504c0d82af..0880cdb893839 100644 --- a/examples/cluster/plot_segmentation_toy.py +++ b/examples/cluster/plot_segmentation_toy.py @@ -75,12 +75,11 @@ # that is close to a Voronoi partition graph.data = np.exp(-graph.data / graph.data.std()) -import matplotlib.pyplot as plt - # %% # Here we perform spectral clustering using the arpack solver since amg is # numerically unstable on this example. We then plot the results. from sklearn.cluster import spectral_clustering +import matplotlib.pyplot as plt labels = spectral_clustering(graph, n_clusters=4, eigen_solver="arpack") label_im = np.full(mask.shape, -1.0) diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py index 42a628241ba2e..430d00a8b3730 100644 --- a/examples/cluster/plot_ward_structured_vs_unstructured.py +++ b/examples/cluster/plot_ward_structured_vs_unstructured.py @@ -27,14 +27,13 @@ import time as time -import mpl_toolkits.mplot3d # noqa: F401 -import numpy as np - -from sklearn.datasets import make_swiss_roll - # The following import is required # for 3D projection to work with matplotlib < 3.2 +import mpl_toolkits.mplot3d # noqa: F401 + +import numpy as np + # %% # Generate data @@ -42,6 +41,7 @@ # # We start by generating the Swiss Roll dataset. +from sklearn.datasets import make_swiss_roll n_samples = 1500 noise = 0.05 diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py index 669e817cbf81d..d4798d828b321 100644 --- a/examples/compose/plot_column_transformer.py +++ b/examples/compose/plot_column_transformer.py @@ -24,14 +24,14 @@ import numpy as np -from sklearn.compose import ColumnTransformer +from sklearn.preprocessing import FunctionTransformer from sklearn.datasets import fetch_20newsgroups from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction import DictVectorizer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics import classification_report from sklearn.pipeline import Pipeline -from sklearn.preprocessing import FunctionTransformer +from sklearn.compose import ColumnTransformer from sklearn.svm import LinearSVC ############################################################################## diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 3b6d648a80f27..2a801405fc1c3 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -33,11 +33,11 @@ from sklearn.compose import ColumnTransformer from sklearn.datasets import fetch_openml +from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer +from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import GridSearchCV, train_test_split -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.model_selection import train_test_split, GridSearchCV np.random.seed(0) diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py index bbcb9213da0f6..cd2c65021c4d4 100644 --- a/examples/compose/plot_compare_reduction.py +++ b/examples/compose/plot_compare_reduction.py @@ -27,15 +27,14 @@ # Authors: Robert McGibbon, Joel Nothman, Guillaume Lemaitre -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.datasets import load_digits -from sklearn.decomposition import NMF, PCA -from sklearn.feature_selection import SelectKBest, chi2 from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC +from sklearn.decomposition import PCA, NMF +from sklearn.feature_selection import SelectKBest, chi2 pipe = Pipeline( [ @@ -99,9 +98,8 @@ # cache. Hence, use the ``memory`` constructor parameter when the fitting # of a transformer is costly. -from shutil import rmtree - from joblib import Memory +from shutil import rmtree # Create a temporary folder to store the transformers of the pipeline location = "cachedir" diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py index f81b377cee759..acd3068d991c9 100644 --- a/examples/compose/plot_digits_pipe.py +++ b/examples/compose/plot_digits_pipe.py @@ -15,15 +15,15 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt import pandas as pd from sklearn import datasets from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline +from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import StandardScaler # Define a pipeline to search for the best combination of PCA truncation diff --git a/examples/compose/plot_feature_union.py b/examples/compose/plot_feature_union.py index 01f7e02bfe44f..e014b8b8808b9 100644 --- a/examples/compose/plot_feature_union.py +++ b/examples/compose/plot_feature_union.py @@ -20,12 +20,12 @@ # # License: BSD 3 clause +from sklearn.pipeline import Pipeline, FeatureUnion +from sklearn.model_selection import GridSearchCV +from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.decomposition import PCA from sklearn.feature_selection import SelectKBest -from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import FeatureUnion, Pipeline -from sklearn.svm import SVC iris = load_iris() diff --git a/examples/compose/plot_transformed_target.py b/examples/compose/plot_transformed_target.py index 20de60f2a01bb..2454affb349cf 100644 --- a/examples/compose/plot_transformed_target.py +++ b/examples/compose/plot_transformed_target.py @@ -15,14 +15,14 @@ # Author: Guillaume Lemaitre # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn.compose import TransformedTargetRegressor from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split from sklearn.linear_model import RidgeCV +from sklearn.compose import TransformedTargetRegressor from sklearn.metrics import median_absolute_error, r2_score -from sklearn.model_selection import train_test_split # %% # Synthetic example diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py index df9af8ea330ba..be3bf4837eb9f 100644 --- a/examples/covariance/plot_covariance_estimation.py +++ b/examples/covariance/plot_covariance_estimation.py @@ -37,9 +37,8 @@ # Compute the likelihood on test data # ----------------------------------- -from scipy import linalg - from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood +from scipy import linalg # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-2, 0, 30) @@ -74,8 +73,8 @@ # are Gaussian, in particular for small samples. -from sklearn.covariance import OAS, LedoitWolf from sklearn.model_selection import GridSearchCV +from sklearn.covariance import LedoitWolf, OAS # GridSearch for an optimal shrinkage coefficient tuned_parameters = [{"shrinkage": shrinkages}] diff --git a/examples/covariance/plot_lw_vs_oas.py b/examples/covariance/plot_lw_vs_oas.py index 107f6bd1c29cc..1fd84b180f50a 100644 --- a/examples/covariance/plot_lw_vs_oas.py +++ b/examples/covariance/plot_lw_vs_oas.py @@ -21,11 +21,11 @@ """ -import matplotlib.pyplot as plt import numpy as np -from scipy.linalg import cholesky, toeplitz +import matplotlib.pyplot as plt +from scipy.linalg import toeplitz, cholesky -from sklearn.covariance import OAS, LedoitWolf +from sklearn.covariance import LedoitWolf, OAS np.random.seed(0) # %% diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py index bd61e5af22147..b93d68a269706 100644 --- a/examples/covariance/plot_mahalanobis_distances.py +++ b/examples/covariance/plot_mahalanobis_distances.py @@ -103,7 +103,6 @@ # designed to have a much larger variance in feature 2. import matplotlib.pyplot as plt - from sklearn.covariance import EmpiricalCovariance, MinCovDet # fit a MCD robust estimator to data diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py index c543eeac16ba4..9cffa57beda0a 100644 --- a/examples/covariance/plot_robust_vs_empirical_covariance.py +++ b/examples/covariance/plot_robust_vs_empirical_covariance.py @@ -53,9 +53,9 @@ """ -import matplotlib.font_manager -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt +import matplotlib.font_manager from sklearn.covariance import EmpiricalCovariance, MinCovDet diff --git a/examples/covariance/plot_sparse_cov.py b/examples/covariance/plot_sparse_cov.py index a088aeb7e69c0..96a5486dc964e 100644 --- a/examples/covariance/plot_sparse_cov.py +++ b/examples/covariance/plot_sparse_cov.py @@ -59,7 +59,6 @@ # ----------------- import numpy as np from scipy import linalg - from sklearn.datasets import make_sparse_spd_matrix n_samples = 60 diff --git a/examples/cross_decomposition/plot_compare_cross_decomposition.py b/examples/cross_decomposition/plot_compare_cross_decomposition.py index 866c5b99584e5..deccd7aa1932c 100644 --- a/examples/cross_decomposition/plot_compare_cross_decomposition.py +++ b/examples/cross_decomposition/plot_compare_cross_decomposition.py @@ -20,10 +20,9 @@ """ -import matplotlib.pyplot as plt import numpy as np - -from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression +import matplotlib.pyplot as plt +from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA # ############################################################################# # Dataset based latent variables model diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py index 9f3b5a7cf0460..529225d11eead 100644 --- a/examples/cross_decomposition/plot_pcr_vs_pls.py +++ b/examples/cross_decomposition/plot_pcr_vs_pls.py @@ -33,8 +33,6 @@ """ -import matplotlib.pyplot as plt - # %% # The data # -------- @@ -44,7 +42,7 @@ # components of this dataset, i.e. the two directions that explain the most # variance in the data. import numpy as np - +import matplotlib.pyplot as plt from sklearn.decomposition import PCA rng = np.random.RandomState(0) @@ -89,10 +87,6 @@ plt.tight_layout() plt.show() -from sklearn.cross_decomposition import PLSRegression -from sklearn.decomposition import PCA -from sklearn.linear_model import LinearRegression - # %% # Projection on one component and predictive power # ------------------------------------------------ @@ -107,7 +101,10 @@ # use as training data. from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline +from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA +from sklearn.cross_decomposition import PLSRegression X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py index 8519b0e70d1de..a148dd37626e0 100644 --- a/examples/datasets/plot_digits_last_image.py +++ b/examples/datasets/plot_digits_last_image.py @@ -19,10 +19,10 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import matplotlib.pyplot as plt - from sklearn import datasets +import matplotlib.pyplot as plt + # Load the digits dataset digits = datasets.load_digits() diff --git a/examples/datasets/plot_random_dataset.py b/examples/datasets/plot_random_dataset.py index e5cbdb080b59f..4f3fdbbb11ef5 100644 --- a/examples/datasets/plot_random_dataset.py +++ b/examples/datasets/plot_random_dataset.py @@ -16,7 +16,9 @@ import matplotlib.pyplot as plt -from sklearn.datasets import make_blobs, make_classification, make_gaussian_quantiles +from sklearn.datasets import make_classification +from sklearn.datasets import make_blobs +from sklearn.datasets import make_gaussian_quantiles plt.figure(figsize=(8, 8)) plt.subplots_adjust(bottom=0.05, top=0.9, left=0.05, right=0.95) diff --git a/examples/datasets/plot_random_multilabel_dataset.py b/examples/datasets/plot_random_multilabel_dataset.py index e6e2d6ad9edcf..f22c7b9695c42 100644 --- a/examples/datasets/plot_random_multilabel_dataset.py +++ b/examples/datasets/plot_random_multilabel_dataset.py @@ -35,8 +35,8 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import make_multilabel_classification as make_ml_clf diff --git a/examples/decomposition/plot_beta_divergence.py b/examples/decomposition/plot_beta_divergence.py index e06bde0a83e10..2a69f9a22ffb4 100644 --- a/examples/decomposition/plot_beta_divergence.py +++ b/examples/decomposition/plot_beta_divergence.py @@ -8,9 +8,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.decomposition._nmf import _beta_divergence x = np.linspace(0.001, 4, 1000) diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py index 54cb3fa9faac7..0eb07dc3efb2d 100644 --- a/examples/decomposition/plot_faces_decomposition.py +++ b/examples/decomposition/plot_faces_decomposition.py @@ -21,11 +21,12 @@ import logging -import matplotlib.pyplot as plt from numpy.random import RandomState +import matplotlib.pyplot as plt -from sklearn import cluster, decomposition from sklearn.datasets import fetch_olivetti_faces +from sklearn import cluster +from sklearn import decomposition rng = RandomState(0) diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py index 2afd204a6dbdd..15945e5075ce8 100644 --- a/examples/decomposition/plot_ica_blind_source_separation.py +++ b/examples/decomposition/plot_ica_blind_source_separation.py @@ -14,11 +14,11 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from scipy import signal -from sklearn.decomposition import PCA, FastICA +from sklearn.decomposition import FastICA, PCA # ############################################################################# # Generate sample data diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py index 8d7ee613041e9..71019be17b5ba 100644 --- a/examples/decomposition/plot_image_denoising.py +++ b/examples/decomposition/plot_image_denoising.py @@ -38,6 +38,7 @@ import numpy as np import scipy as sp + try: # SciPy >= 0.16 have face in misc from scipy.misc import face diff --git a/examples/decomposition/plot_incremental_pca.py b/examples/decomposition/plot_incremental_pca.py index 8e5aeccfddc8a..adc7f83f3cda0 100644 --- a/examples/decomposition/plot_incremental_pca.py +++ b/examples/decomposition/plot_incremental_pca.py @@ -22,8 +22,8 @@ # Authors: Kyle Kastner # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.decomposition import PCA, IncrementalPCA diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py index 93a35fb1a3762..e539af6d66b7a 100644 --- a/examples/decomposition/plot_pca_3d.py +++ b/examples/decomposition/plot_pca_3d.py @@ -20,6 +20,7 @@ # --------------- import numpy as np + from scipy import stats e = np.exp(1) @@ -52,13 +53,13 @@ def pdf(x): # Plot the figures # ---------------- +from sklearn.decomposition import PCA + import matplotlib.pyplot as plt # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 -from sklearn.decomposition import PCA - def plot_figs(fig_num, elev, azim): fig = plt.figure(fig_num, figsize=(4, 3)) diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py index 13789479c6af3..e42bf7cf91d7e 100644 --- a/examples/decomposition/plot_pca_iris.py +++ b/examples/decomposition/plot_pca_iris.py @@ -14,10 +14,12 @@ # Code source: Gaël Varoquaux # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt + -from sklearn import datasets, decomposition +from sklearn import decomposition +from sklearn import datasets np.random.seed(5) diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py index e269fc6b5c278..4c934ab756c3e 100644 --- a/examples/decomposition/plot_pca_vs_fa_model_selection.py +++ b/examples/decomposition/plot_pca_vs_fa_model_selection.py @@ -34,6 +34,7 @@ # --------------- import numpy as np + from scipy import linalg n_samples, n_features, rank = 500, 25, 5 @@ -55,9 +56,10 @@ import matplotlib.pyplot as plt -from sklearn.covariance import LedoitWolf, ShrunkCovariance from sklearn.decomposition import PCA, FactorAnalysis -from sklearn.model_selection import GridSearchCV, cross_val_score +from sklearn.covariance import ShrunkCovariance, LedoitWolf +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import GridSearchCV n_components = np.arange(0, n_features, 5) # options for n_components diff --git a/examples/decomposition/plot_sparse_coding.py b/examples/decomposition/plot_sparse_coding.py index c45cd3c83b04f..4f4602f1ff1ac 100644 --- a/examples/decomposition/plot_sparse_coding.py +++ b/examples/decomposition/plot_sparse_coding.py @@ -16,8 +16,8 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.decomposition import SparseCoder diff --git a/examples/decomposition/plot_varimax_fa.py b/examples/decomposition/plot_varimax_fa.py index 97d6cf8fdf54b..0103ccc65b5d6 100644 --- a/examples/decomposition/plot_varimax_fa.py +++ b/examples/decomposition/plot_varimax_fa.py @@ -22,9 +22,9 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.datasets import load_iris -from sklearn.decomposition import PCA, FactorAnalysis +from sklearn.decomposition import FactorAnalysis, PCA from sklearn.preprocessing import StandardScaler +from sklearn.datasets import load_iris # %% # Load Iris data diff --git a/examples/ensemble/plot_adaboost_hastie_10_2.py b/examples/ensemble/plot_adaboost_hastie_10_2.py index c2c791aa6ff67..4a61639cb2494 100644 --- a/examples/ensemble/plot_adaboost_hastie_10_2.py +++ b/examples/ensemble/plot_adaboost_hastie_10_2.py @@ -94,7 +94,6 @@ # added to the ensemble. import numpy as np - from sklearn.metrics import zero_one_loss ada_discrete_err = np.zeros((n_estimators,)) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index 3f7af092b0a6c..c94cc94959576 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -35,6 +35,7 @@ from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier + X, y = make_gaussian_quantiles( n_samples=13000, n_features=10, n_classes=3, random_state=1 ) diff --git a/examples/ensemble/plot_adaboost_regression.py b/examples/ensemble/plot_adaboost_regression.py index 65f0879c40804..e60051d1c25a0 100644 --- a/examples/ensemble/plot_adaboost_regression.py +++ b/examples/ensemble/plot_adaboost_regression.py @@ -17,13 +17,11 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt - # importing necessary libraries import numpy as np - -from sklearn.ensemble import AdaBoostRegressor +import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeRegressor +from sklearn.ensemble import AdaBoostRegressor # Create the dataset rng = np.random.RandomState(1) diff --git a/examples/ensemble/plot_adaboost_twoclass.py b/examples/ensemble/plot_adaboost_twoclass.py index d1e89c47b7fcf..19679c6285d3b 100644 --- a/examples/ensemble/plot_adaboost_twoclass.py +++ b/examples/ensemble/plot_adaboost_twoclass.py @@ -21,13 +21,14 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn.datasets import make_gaussian_quantiles from sklearn.ensemble import AdaBoostClassifier -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.tree import DecisionTreeClassifier +from sklearn.datasets import make_gaussian_quantiles +from sklearn.inspection import DecisionBoundaryDisplay + # Construct dataset X1, y1 = make_gaussian_quantiles( diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py index 3a8909436fa1c..f8868a7003e4c 100644 --- a/examples/ensemble/plot_bias_variance.py +++ b/examples/ensemble/plot_bias_variance.py @@ -66,8 +66,8 @@ # Author: Gilles Louppe # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.ensemble import BaggingRegressor from sklearn.tree import DecisionTreeRegressor diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py index 972ca1f6259aa..bd678af42a7d1 100644 --- a/examples/ensemble/plot_ensemble_oob.py +++ b/examples/ensemble/plot_ensemble_oob.py @@ -26,10 +26,9 @@ # # License: BSD 3 Clause -from collections import OrderedDict - import matplotlib.pyplot as plt +from collections import OrderedDict from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py index 45db0afb2ec4d..409396a0376b8 100644 --- a/examples/ensemble/plot_feature_transformation.py +++ b/examples/ensemble/plot_feature_transformation.py @@ -59,7 +59,7 @@ # First, we will start by training the random forest and gradient boosting on # the separated training set -from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier +from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier random_forest = RandomForestClassifier( n_estimators=n_estimators, max_depth=max_depth, random_state=10 @@ -100,7 +100,8 @@ # method `apply`. The pipeline in scikit-learn expects a call to `transform`. # Therefore, we wrapped the call to `apply` within a `FunctionTransformer`. -from sklearn.preprocessing import FunctionTransformer, OneHotEncoder +from sklearn.preprocessing import FunctionTransformer +from sklearn.preprocessing import OneHotEncoder def rf_apply(X, model): @@ -137,7 +138,6 @@ def gbdt_apply(X, model): # We can finally show the different ROC curves for all the models. import matplotlib.pyplot as plt - from sklearn.metrics import RocCurveDisplay fig, ax = plt.subplots() diff --git a/examples/ensemble/plot_forest_importances.py b/examples/ensemble/plot_forest_importances.py index 269451168dd7a..fbda63b26faee 100644 --- a/examples/ensemble/plot_forest_importances.py +++ b/examples/ensemble/plot_forest_importances.py @@ -57,7 +57,6 @@ # cardinality** features (many unique values). See # :ref:`permutation_importance` as an alternative below. import time - import numpy as np start_time = time.time() diff --git a/examples/ensemble/plot_forest_importances_faces.py b/examples/ensemble/plot_forest_importances_faces.py index 8b8e8751ec5a2..3848873c297de 100644 --- a/examples/ensemble/plot_forest_importances_faces.py +++ b/examples/ensemble/plot_forest_importances_faces.py @@ -59,7 +59,6 @@ # cardinality** features (many unique values). See # :ref:`permutation_importance` as an alternative. import time - import matplotlib.pyplot as plt start_time = time.time() diff --git a/examples/ensemble/plot_forest_iris.py b/examples/ensemble/plot_forest_iris.py index 6aaceea88efd2..ee414db7125dc 100644 --- a/examples/ensemble/plot_forest_iris.py +++ b/examples/ensemble/plot_forest_iris.py @@ -42,15 +42,15 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap from sklearn.datasets import load_iris from sklearn.ensemble import ( - AdaBoostClassifier, - ExtraTreesClassifier, RandomForestClassifier, + ExtraTreesClassifier, + AdaBoostClassifier, ) from sklearn.tree import DecisionTreeClassifier diff --git a/examples/ensemble/plot_gradient_boosting_categorical.py b/examples/ensemble/plot_gradient_boosting_categorical.py index 2b01218d19130..6eca645654086 100644 --- a/examples/ensemble/plot_gradient_boosting_categorical.py +++ b/examples/ensemble/plot_gradient_boosting_categorical.py @@ -76,9 +76,10 @@ # As a baseline, we create an estimator where the categorical features are # dropped: -from sklearn.compose import make_column_selector, make_column_transformer from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.pipeline import make_pipeline +from sklearn.compose import make_column_transformer +from sklearn.compose import make_column_selector dropper = make_column_transformer( ("drop", make_column_selector(dtype_include="category")), remainder="passthrough" @@ -112,9 +113,8 @@ # were ordered quantities, i.e. the categories will be encoded as 0, 1, 2, # etc., and treated as continuous features. -import numpy as np - from sklearn.preprocessing import OrdinalEncoder +import numpy as np ordinal_encoder = make_column_transformer( ( @@ -161,9 +161,8 @@ # models performance in terms of # :func:`~metrics.mean_absolute_percentage_error` and fit times. -import matplotlib.pyplot as plt - from sklearn.model_selection import cross_validate +import matplotlib.pyplot as plt scoring = "neg_mean_absolute_percentage_error" n_cv_folds = 3 diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py index f271f80a07c55..6f1013eed9564 100644 --- a/examples/ensemble/plot_gradient_boosting_early_stopping.py +++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py @@ -38,10 +38,11 @@ import time -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn import datasets, ensemble +from sklearn import ensemble +from sklearn import datasets from sklearn.model_selection import train_test_split data_list = [ diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py index 5a3a5b6fbf11a..8182eafc2969a 100644 --- a/examples/ensemble/plot_gradient_boosting_oob.py +++ b/examples/ensemble/plot_gradient_boosting_oob.py @@ -29,12 +29,14 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np -from scipy.special import expit +import matplotlib.pyplot as plt from sklearn import ensemble -from sklearn.model_selection import KFold, train_test_split +from sklearn.model_selection import KFold +from sklearn.model_selection import train_test_split + +from scipy.special import expit # Generate data (adapted from G. Ridgeway's gbm example) n_samples = 1000 diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py index 37baf7639c4aa..9e823439b948b 100644 --- a/examples/ensemble/plot_gradient_boosting_quantile.py +++ b/examples/ensemble/plot_gradient_boosting_quantile.py @@ -12,7 +12,6 @@ # Generate some data for a synthetic regression problem by applying the # function f to uniformly sampled random inputs. import numpy as np - from sklearn.model_selection import train_test_split @@ -59,6 +58,7 @@ def f(x): from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import mean_pinball_loss, mean_squared_error + all_models = {} common_params = dict( learning_rate=0.05, @@ -88,6 +88,7 @@ def f(x): # 90% interval (from 5th to 95th conditional percentiles). import matplotlib.pyplot as plt + y_pred = all_models["mse"].predict(xx) y_lower = all_models["q 0.05"].predict(xx) y_upper = all_models["q 0.95"].predict(xx) @@ -230,12 +231,11 @@ def coverage_fraction(y, y_low, y_high): # of the 5th percentile by selecting the best model parameters by # cross-validation on the pinball loss with alpha=0.05: -from pprint import pprint - # %% from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.metrics import make_scorer from sklearn.model_selection import HalvingRandomSearchCV +from sklearn.metrics import make_scorer +from pprint import pprint param_grid = dict( learning_rate=[0.05, 0.1, 0.2], diff --git a/examples/ensemble/plot_gradient_boosting_regression.py b/examples/ensemble/plot_gradient_boosting_regression.py index dacfea9a4d39a..dc29bfbda8f77 100644 --- a/examples/ensemble/plot_gradient_boosting_regression.py +++ b/examples/ensemble/plot_gradient_boosting_regression.py @@ -23,7 +23,6 @@ import matplotlib.pyplot as plt import numpy as np - from sklearn import datasets, ensemble from sklearn.inspection import permutation_importance from sklearn.metrics import mean_squared_error diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py index 45924dd611f6b..28190c16b631e 100644 --- a/examples/ensemble/plot_gradient_boosting_regularization.py +++ b/examples/ensemble/plot_gradient_boosting_regularization.py @@ -25,10 +25,12 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt + +from sklearn import ensemble +from sklearn import datasets -from sklearn import datasets, ensemble from sklearn.model_selection import train_test_split X, y = datasets.make_hastie_10_2(n_samples=4000, random_state=1) diff --git a/examples/ensemble/plot_isolation_forest.py b/examples/ensemble/plot_isolation_forest.py index 587f62fb14ba0..5ffe9eb799ac9 100644 --- a/examples/ensemble/plot_isolation_forest.py +++ b/examples/ensemble/plot_isolation_forest.py @@ -23,9 +23,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.ensemble import IsolationForest rng = np.random.RandomState(42) diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py index 0076ec0115584..fc1ced458b09e 100644 --- a/examples/ensemble/plot_monotonic_constraints.py +++ b/examples/ensemble/plot_monotonic_constraints.py @@ -20,11 +20,11 @@ """ -import matplotlib.pyplot as plt -import numpy as np - from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.inspection import PartialDependenceDisplay +import numpy as np +import matplotlib.pyplot as plt + rng = np.random.RandomState(0) diff --git a/examples/ensemble/plot_random_forest_embedding.py b/examples/ensemble/plot_random_forest_embedding.py index fe26e04ca7789..000b83e67b92a 100644 --- a/examples/ensemble/plot_random_forest_embedding.py +++ b/examples/ensemble/plot_random_forest_embedding.py @@ -26,12 +26,12 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import make_circles +from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier from sklearn.decomposition import TruncatedSVD -from sklearn.ensemble import ExtraTreesClassifier, RandomTreesEmbedding from sklearn.naive_bayes import BernoulliNB # make a synthetic dataset diff --git a/examples/ensemble/plot_random_forest_regression_multioutput.py b/examples/ensemble/plot_random_forest_regression_multioutput.py index ce8346c329127..4b3d4f4a9a728 100644 --- a/examples/ensemble/plot_random_forest_regression_multioutput.py +++ b/examples/ensemble/plot_random_forest_regression_multioutput.py @@ -25,13 +25,13 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.multioutput import MultiOutputRegressor + # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(600, 1) - 100, axis=0) diff --git a/examples/ensemble/plot_stack_predictors.py b/examples/ensemble/plot_stack_predictors.py index c6da319e83a7c..a311f5966880c 100644 --- a/examples/ensemble/plot_stack_predictors.py +++ b/examples/ensemble/plot_stack_predictors.py @@ -132,7 +132,8 @@ def load_ames_housing(): # Then, we will now define the preprocessor used when the ending regressor # is a linear model. -from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import StandardScaler cat_linear_processor = OneHotEncoder(handle_unknown="ignore") num_linear_processor = make_pipeline( @@ -209,10 +210,8 @@ def load_ames_housing(): import time - import matplotlib.pyplot as plt - -from sklearn.model_selection import cross_val_predict, cross_validate +from sklearn.model_selection import cross_validate, cross_val_predict def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time): diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 90441c6d28339..e6dc68eeadf98 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -28,11 +28,11 @@ import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.ensemble import VotingClassifier -from sklearn.inspection import DecisionBoundaryDisplay +from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC -from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import VotingClassifier +from sklearn.inspection import DecisionBoundaryDisplay # Loading some example data iris = datasets.load_iris() diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py index 14f4f4330c045..54c290c3073e0 100644 --- a/examples/ensemble/plot_voting_probas.py +++ b/examples/ensemble/plot_voting_probas.py @@ -23,12 +23,13 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn.ensemble import RandomForestClassifier, VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB +from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import VotingClassifier clf1 = LogisticRegression(max_iter=1000, random_state=123) clf2 = RandomForestClassifier(n_estimators=100, random_state=123) diff --git a/examples/ensemble/plot_voting_regressor.py b/examples/ensemble/plot_voting_regressor.py index d33becca505e3..23e709cc9e62a 100644 --- a/examples/ensemble/plot_voting_regressor.py +++ b/examples/ensemble/plot_voting_regressor.py @@ -26,12 +26,10 @@ import matplotlib.pyplot as plt from sklearn.datasets import load_diabetes -from sklearn.ensemble import ( - GradientBoostingRegressor, - RandomForestRegressor, - VotingRegressor, -) +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression +from sklearn.ensemble import VotingRegressor # %% # Training classifiers diff --git a/examples/exercises/plot_cv_digits.py b/examples/exercises/plot_cv_digits.py index ebad3a55098b5..e43bbd86bb027 100644 --- a/examples/exercises/plot_cv_digits.py +++ b/examples/exercises/plot_cv_digits.py @@ -11,9 +11,8 @@ """ import numpy as np - -from sklearn import datasets, svm from sklearn.model_selection import cross_val_score +from sklearn import datasets, svm X, y = datasets.load_digits(return_X_y=True) diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py index 25b0171c66421..877e615659743 100644 --- a/examples/exercises/plot_digits_classification_exercise.py +++ b/examples/exercises/plot_digits_classification_exercise.py @@ -12,7 +12,7 @@ """ -from sklearn import datasets, linear_model, neighbors +from sklearn import datasets, neighbors, linear_model X_digits, y_digits = datasets.load_digits(return_X_y=True) X_digits = X_digits / X_digits.max() diff --git a/examples/exercises/plot_iris_exercise.py b/examples/exercises/plot_iris_exercise.py index 07687b920e1b8..74da8c27889c9 100644 --- a/examples/exercises/plot_iris_exercise.py +++ b/examples/exercises/plot_iris_exercise.py @@ -10,9 +10,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import datasets, svm iris = datasets.load_iris() diff --git a/examples/feature_selection/plot_f_test_vs_mi.py b/examples/feature_selection/plot_f_test_vs_mi.py index d6b5d13da6042..a8cfc5d426bbc 100644 --- a/examples/feature_selection/plot_f_test_vs_mi.py +++ b/examples/feature_selection/plot_f_test_vs_mi.py @@ -23,9 +23,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.feature_selection import f_regression, mutual_info_regression np.random.seed(0) diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py index 2cf64cb6ea598..33ac2bd558dc5 100644 --- a/examples/feature_selection/plot_feature_selection.py +++ b/examples/feature_selection/plot_feature_selection.py @@ -21,7 +21,6 @@ # -------------------- # import numpy as np - from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py index 553f38f9c674f..9684f5fabd383 100644 --- a/examples/feature_selection/plot_rfe_digits.py +++ b/examples/feature_selection/plot_rfe_digits.py @@ -12,11 +12,10 @@ """ # noqa: E501 -import matplotlib.pyplot as plt - +from sklearn.svm import SVC from sklearn.datasets import load_digits from sklearn.feature_selection import RFE -from sklearn.svm import SVC +import matplotlib.pyplot as plt # Load the digits dataset digits = load_digits() diff --git a/examples/feature_selection/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py index f44f086f96842..4ebd4b55730c8 100644 --- a/examples/feature_selection/plot_rfe_with_cross_validation.py +++ b/examples/feature_selection/plot_rfe_with_cross_validation.py @@ -9,11 +9,10 @@ """ import matplotlib.pyplot as plt - -from sklearn.datasets import make_classification -from sklearn.feature_selection import RFECV -from sklearn.model_selection import StratifiedKFold from sklearn.svm import SVC +from sklearn.model_selection import StratifiedKFold +from sklearn.feature_selection import RFECV +from sklearn.datasets import make_classification # Build a classification task using 3 informative features X, y = make_classification( diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py index 34764d72c7c35..16f63868feae0 100644 --- a/examples/feature_selection/plot_select_from_model_diabetes.py +++ b/examples/feature_selection/plot_select_from_model_diabetes.py @@ -46,7 +46,6 @@ # :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`. import matplotlib.pyplot as plt import numpy as np - from sklearn.linear_model import RidgeCV ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y) @@ -56,8 +55,6 @@ plt.title("Feature importances via coefficients") plt.show() -from time import time - # %% # Selecting features based on importance # -------------------------------------- @@ -71,6 +68,7 @@ # Since we want to select only 2 features, we will set this threshold slightly # above the coefficient of third most important feature. from sklearn.feature_selection import SelectFromModel +from time import time threshold = np.sort(importance)[-3] + 0.01 diff --git a/examples/gaussian_process/plot_compare_gpr_krr.py b/examples/gaussian_process/plot_compare_gpr_krr.py index a6136506341ee..42c013523f79c 100644 --- a/examples/gaussian_process/plot_compare_gpr_krr.py +++ b/examples/gaussian_process/plot_compare_gpr_krr.py @@ -125,7 +125,6 @@ # # Thus, let's use such a :class:`~sklearn.kernel_ridge.KernelRidge`. import time - from sklearn.gaussian_process.kernels import ExpSineSquared from sklearn.kernel_ridge import KernelRidge diff --git a/examples/gaussian_process/plot_gpc.py b/examples/gaussian_process/plot_gpc.py index 21a99065e06ce..e2d78fa23f09e 100644 --- a/examples/gaussian_process/plot_gpc.py +++ b/examples/gaussian_process/plot_gpc.py @@ -27,11 +27,13 @@ # License: BSD 3 clause import numpy as np + from matplotlib import pyplot as plt +from sklearn.metrics import accuracy_score, log_loss from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF -from sklearn.metrics import accuracy_score, log_loss + # Generate data train_size = 50 diff --git a/examples/gaussian_process/plot_gpc_iris.py b/examples/gaussian_process/plot_gpc_iris.py index 88c536d8824c8..ce0ed066a1377 100644 --- a/examples/gaussian_process/plot_gpc_iris.py +++ b/examples/gaussian_process/plot_gpc_iris.py @@ -10,9 +10,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import datasets from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF diff --git a/examples/gaussian_process/plot_gpc_isoprobability.py b/examples/gaussian_process/plot_gpc_isoprobability.py index 10785b279f1f0..84f1ecb98bd3c 100644 --- a/examples/gaussian_process/plot_gpc_isoprobability.py +++ b/examples/gaussian_process/plot_gpc_isoprobability.py @@ -15,12 +15,12 @@ # License: BSD 3 clause import numpy as np -from matplotlib import cm + from matplotlib import pyplot as plt +from matplotlib import cm from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import ConstantKernel as C -from sklearn.gaussian_process.kernels import DotProduct +from sklearn.gaussian_process.kernels import DotProduct, ConstantKernel as C # A few constants lim = 8 diff --git a/examples/gaussian_process/plot_gpc_xor.py b/examples/gaussian_process/plot_gpc_xor.py index d963ce6aaea40..6eebbcf80098e 100644 --- a/examples/gaussian_process/plot_gpc_xor.py +++ b/examples/gaussian_process/plot_gpc_xor.py @@ -15,12 +15,13 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF, DotProduct + xx, yy = np.meshgrid(np.linspace(-3, 3, 50), np.linspace(-3, 3, 50)) rng = np.random.RandomState(0) X = rng.randn(200, 2) diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py index a3acd1dbfcbd3..bfc1c21631b26 100644 --- a/examples/gaussian_process/plot_gpr_co2.py +++ b/examples/gaussian_process/plot_gpr_co2.py @@ -172,7 +172,6 @@ # Thus, we create synthetic data from 1958 to the current month. In addition, # we need to add the subtracted mean computed during training. import datetime - import numpy as np today = datetime.datetime.now() diff --git a/examples/gaussian_process/plot_gpr_on_structured_data.py b/examples/gaussian_process/plot_gpr_on_structured_data.py index 58d567b52781f..bc8c169c91f67 100644 --- a/examples/gaussian_process/plot_gpr_on_structured_data.py +++ b/examples/gaussian_process/plot_gpr_on_structured_data.py @@ -38,12 +38,13 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt +from sklearn.gaussian_process.kernels import Kernel, Hyperparameter +from sklearn.gaussian_process.kernels import GenericKernelMixin +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.base import clone -from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor -from sklearn.gaussian_process.kernels import GenericKernelMixin, Hyperparameter, Kernel class SequenceKernel(GenericKernelMixin, Kernel): diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py index 9dc8b6c831710..d83922817e5de 100644 --- a/examples/impute/plot_iterative_imputer_variants_comparison.py +++ b/examples/impute/plot_iterative_imputer_variants_comparison.py @@ -44,21 +44,21 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt import pandas as pd -from sklearn.datasets import fetch_california_housing -from sklearn.ensemble import RandomForestRegressor - # To use this experimental feature, we need to explicitly ask for it: from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.impute import IterativeImputer, SimpleImputer -from sklearn.kernel_approximation import Nystroem +from sklearn.datasets import fetch_california_housing +from sklearn.impute import SimpleImputer +from sklearn.impute import IterativeImputer from sklearn.linear_model import BayesianRidge, Ridge -from sklearn.model_selection import cross_val_score +from sklearn.kernel_approximation import Nystroem +from sklearn.ensemble import RandomForestRegressor from sklearn.neighbors import KNeighborsRegressor from sklearn.pipeline import make_pipeline +from sklearn.model_selection import cross_val_score N_SPLITS = 5 diff --git a/examples/impute/plot_missing_values.py b/examples/impute/plot_missing_values.py index 70ff87b92f056..ca800ba3602b2 100644 --- a/examples/impute/plot_missing_values.py +++ b/examples/impute/plot_missing_values.py @@ -44,7 +44,9 @@ import numpy as np -from sklearn.datasets import fetch_california_housing, load_diabetes +from sklearn.datasets import fetch_california_housing +from sklearn.datasets import load_diabetes + rng = np.random.RandomState(42) @@ -93,10 +95,11 @@ def add_missing_values(X_full, y_full): # To use the experimental IterativeImputer, we need to explicitly ask for it: from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer +from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer from sklearn.model_selection import cross_val_score from sklearn.pipeline import make_pipeline + N_SPLITS = 4 regressor = RandomForestRegressor(random_state=0) @@ -258,6 +261,7 @@ def get_impute_iterative(X_missing, y_missing): import matplotlib.pyplot as plt + n_bars = len(mses_diabetes) xval = np.arange(n_bars) diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py index 644cdcfa4a03c..3cc557c64b69c 100644 --- a/examples/inspection/plot_linear_model_coefficient_interpretation.py +++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py @@ -29,16 +29,13 @@ """ -import matplotlib.pyplot as plt - # %% import numpy as np -import pandas as pd import scipy as sp +import pandas as pd +import matplotlib.pyplot as plt import seaborn as sns -from sklearn.datasets import fetch_openml - # %% # The dataset: wages # ------------------ @@ -47,6 +44,7 @@ # Note that setting the parameter `as_frame` to True will retrieve the data # as a pandas dataframe. +from sklearn.datasets import fetch_openml survey = fetch_openml(data_id=534, as_frame=True, parser="pandas") @@ -145,9 +143,9 @@ # To describe the dataset as a linear model we use a ridge regressor # with a very small regularization and to model the logarithm of the WAGE. -from sklearn.compose import TransformedTargetRegressor -from sklearn.linear_model import Ridge from sklearn.pipeline import make_pipeline +from sklearn.linear_model import Ridge +from sklearn.compose import TransformedTargetRegressor model = make_pipeline( preprocessor, @@ -309,7 +307,8 @@ # their robustness is not guaranteed, and they should probably be interpreted # with caution. -from sklearn.model_selection import RepeatedKFold, cross_validate +from sklearn.model_selection import cross_validate +from sklearn.model_selection import RepeatedKFold cv = RepeatedKFold(n_splits=5, n_repeats=5, random_state=0) cv_model = cross_validate( diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index c3b22aec358a1..d7480a7898424 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -43,7 +43,6 @@ # (here the average target, by default). import pandas as pd - from sklearn.datasets import fetch_california_housing from sklearn.model_selection import train_test_split @@ -72,10 +71,9 @@ # single-variable partial dependence plots. from time import time - -from sklearn.neural_network import MLPRegressor from sklearn.pipeline import make_pipeline from sklearn.preprocessing import QuantileTransformer +from sklearn.neural_network import MLPRegressor print("Training MLPRegressor...") tic = time() @@ -249,9 +247,6 @@ ) display.figure_.subplots_adjust(wspace=0.4, hspace=0.3) -# unused but required import for doing 3d projections with matplotlib < 3.2 -import mpl_toolkits.mplot3d # noqa: F401 - # %% # The two-way partial dependence plot shows the dependence of median house # price on joint values of house age and average occupants per household. We @@ -267,6 +262,9 @@ # this time in 3 dimensions. import numpy as np +# unused but required import for doing 3d projections with matplotlib < 3.2 +import mpl_toolkits.mplot3d # noqa: F401 + from sklearn.inspection import partial_dependence fig = plt.figure() diff --git a/examples/inspection/plot_permutation_importance.py b/examples/inspection/plot_permutation_importance.py index 8840768d7ea39..9e3724687a306 100644 --- a/examples/inspection/plot_permutation_importance.py +++ b/examples/inspection/plot_permutation_importance.py @@ -56,8 +56,6 @@ X = X[categorical_columns + numerical_columns] X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) -from sklearn.compose import ColumnTransformer - # %% # We define a predictive model based on a random forest. Therefore, we will make # the following preprocessing steps: @@ -68,6 +66,7 @@ # numerical features using a mean strategy. from sklearn.ensemble import RandomForestClassifier from sklearn.impute import SimpleImputer +from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.preprocessing import OrdinalEncoder diff --git a/examples/inspection/plot_permutation_importance_multicollinear.py b/examples/inspection/plot_permutation_importance_multicollinear.py index e14916e808af9..59871c00946a6 100644 --- a/examples/inspection/plot_permutation_importance_multicollinear.py +++ b/examples/inspection/plot_permutation_importance_multicollinear.py @@ -22,9 +22,9 @@ import matplotlib.pyplot as plt import numpy as np +from scipy.stats import spearmanr from scipy.cluster import hierarchy from scipy.spatial.distance import squareform -from scipy.stats import spearmanr from sklearn.datasets import load_breast_cancer from sklearn.ensemble import RandomForestClassifier diff --git a/examples/kernel_approximation/plot_scalable_poly_kernels.py b/examples/kernel_approximation/plot_scalable_poly_kernels.py index 5e9126389a5f9..ade27e16e349a 100644 --- a/examples/kernel_approximation/plot_scalable_poly_kernels.py +++ b/examples/kernel_approximation/plot_scalable_poly_kernels.py @@ -64,8 +64,8 @@ # the LIBSVM webpage, and then normalize to unit length as done in the # original Tensor Sketch paper [1]. -from sklearn.pipeline import make_pipeline from sklearn.preprocessing import MinMaxScaler, Normalizer +from sklearn.pipeline import make_pipeline mm = make_pipeline(MinMaxScaler(), Normalizer()) X_train = mm.fit_transform(X_train) @@ -80,7 +80,6 @@ # plot them later. import time - from sklearn.svm import LinearSVC results = {} diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py index 79b49fb76ef9a..261fec8aeee3b 100644 --- a/examples/linear_model/plot_ard.py +++ b/examples/linear_model/plot_ard.py @@ -54,8 +54,7 @@ # coefficients. import pandas as pd - -from sklearn.linear_model import ARDRegression, BayesianRidge, LinearRegression +from sklearn.linear_model import ARDRegression, LinearRegression, BayesianRidge olr = LinearRegression().fit(X, y) brr = BayesianRidge(compute_score=True, n_iter=30).fit(X, y) diff --git a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py index 8313b0b56922e..3bca3101758ff 100644 --- a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py +++ b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py @@ -20,7 +20,6 @@ # %% # Let's start by loading the dataset and creating some sample weights. import numpy as np - from sklearn.datasets import make_regression rng = np.random.RandomState(0) diff --git a/examples/linear_model/plot_huber_vs_ridge.py b/examples/linear_model/plot_huber_vs_ridge.py index 7c0222b71a721..2ea5a190e35d8 100644 --- a/examples/linear_model/plot_huber_vs_ridge.py +++ b/examples/linear_model/plot_huber_vs_ridge.py @@ -16,8 +16,8 @@ # Authors: Manoj Kumar mks542@nyu.edu # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import make_regression from sklearn.linear_model import HuberRegressor, Ridge diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py index 4d420c26977ed..10a1f0f15ad79 100644 --- a/examples/linear_model/plot_iris_logistic.py +++ b/examples/linear_model/plot_iris_logistic.py @@ -16,10 +16,9 @@ # License: BSD 3 clause import matplotlib.pyplot as plt - +from sklearn.linear_model import LogisticRegression from sklearn import datasets from sklearn.inspection import DecisionBoundaryDisplay -from sklearn.linear_model import LogisticRegression # import some data to play with iris = datasets.load_iris() diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py index a7804c974fd49..c167b0ce785e2 100644 --- a/examples/linear_model/plot_lasso_and_elasticnet.py +++ b/examples/linear_model/plot_lasso_and_elasticnet.py @@ -13,8 +13,8 @@ # Data Generation # --------------------------------------------------- -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.metrics import r2_score diff --git a/examples/linear_model/plot_lasso_coordinate_descent_path.py b/examples/linear_model/plot_lasso_coordinate_descent_path.py index ee2f09f000d23..1796dc5011644 100644 --- a/examples/linear_model/plot_lasso_coordinate_descent_path.py +++ b/examples/linear_model/plot_lasso_coordinate_descent_path.py @@ -14,12 +14,12 @@ # License: BSD 3 clause from itertools import cycle - -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt +from sklearn.linear_model import lasso_path, enet_path from sklearn import datasets -from sklearn.linear_model import enet_path, lasso_path + X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py index a797d5d708160..8da1820c0b0c4 100644 --- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py +++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py @@ -9,12 +9,13 @@ """ from time import time - -from scipy import linalg, sparse +from scipy import sparse +from scipy import linalg from sklearn.datasets import make_regression from sklearn.linear_model import Lasso + # %% # Comparing the two Lasso implementations on Dense data # ----------------------------------------------------- diff --git a/examples/linear_model/plot_lasso_lars.py b/examples/linear_model/plot_lasso_lars.py index 5444aeec90c65..6788b8b1d1598 100644 --- a/examples/linear_model/plot_lasso_lars.py +++ b/examples/linear_model/plot_lasso_lars.py @@ -14,10 +14,11 @@ # Alexandre Gramfort # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn import datasets, linear_model +from sklearn import linear_model +from sklearn import datasets X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/linear_model/plot_lasso_lars_ic.py b/examples/linear_model/plot_lasso_lars_ic.py index dc76f16fb8c82..4a09d28cdce9a 100644 --- a/examples/linear_model/plot_lasso_lars_ic.py +++ b/examples/linear_model/plot_lasso_lars_ic.py @@ -36,9 +36,6 @@ n_samples = X.shape[0] X.head() -from sklearn.linear_model import LassoLarsIC -from sklearn.pipeline import make_pipeline - # %% # Scikit-learn provides an estimator called # :class:`~sklearn.linear_model.LinearLarsIC` that uses either Akaike's @@ -49,6 +46,8 @@ # In the following, we are going to fit two models to compare the values # reported by AIC and BIC. from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import LassoLarsIC +from sklearn.pipeline import make_pipeline lasso_lars_ic = make_pipeline( StandardScaler(), LassoLarsIC(criterion="aic", normalize=False) diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index f7e9a2c35196f..bf2111e32b427 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -59,10 +59,9 @@ # # We will first fit a Lasso model with the AIC criterion. import time - +from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LassoLarsIC from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler start_time = time.time() lasso_lars_ic = make_pipeline( diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py index e82077771bcf1..f0f5dbf710714 100644 --- a/examples/linear_model/plot_logistic.py +++ b/examples/linear_model/plot_logistic.py @@ -13,11 +13,11 @@ # Code source: Gael Varoquaux # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np -from scipy.special import expit +import matplotlib.pyplot as plt -from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.linear_model import LogisticRegression, LinearRegression +from scipy.special import expit # Generate a toy dataset, it's just a straight line with some Gaussian noise: xmin, xmax = -5, 5 diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py index d5aa883b7b7b1..ce0afef012a2b 100644 --- a/examples/linear_model/plot_logistic_l1_l2_sparsity.py +++ b/examples/linear_model/plot_logistic_l1_l2_sparsity.py @@ -20,11 +20,11 @@ # Andreas Mueller # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn import datasets from sklearn.linear_model import LogisticRegression +from sklearn import datasets from sklearn.preprocessing import StandardScaler X, y = datasets.load_digits(return_X_y=True) diff --git a/examples/linear_model/plot_logistic_multinomial.py b/examples/linear_model/plot_logistic_multinomial.py index 791a788b2238b..814eeadaa68c4 100644 --- a/examples/linear_model/plot_logistic_multinomial.py +++ b/examples/linear_model/plot_logistic_multinomial.py @@ -12,12 +12,11 @@ # Authors: Tom Dupre la Tour # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.datasets import make_blobs -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.linear_model import LogisticRegression +from sklearn.inspection import DecisionBoundaryDisplay # make 3-class dataset for classification centers = [[-5, 0], [0, 1.5], [5, -1]] diff --git a/examples/linear_model/plot_multi_task_lasso_support.py b/examples/linear_model/plot_multi_task_lasso_support.py index f976698c4a30e..b53c78b986acd 100644 --- a/examples/linear_model/plot_multi_task_lasso_support.py +++ b/examples/linear_model/plot_multi_task_lasso_support.py @@ -19,7 +19,7 @@ import matplotlib.pyplot as plt import numpy as np -from sklearn.linear_model import Lasso, MultiTaskLasso +from sklearn.linear_model import MultiTaskLasso, Lasso rng = np.random.RandomState(42) diff --git a/examples/linear_model/plot_nnls.py b/examples/linear_model/plot_nnls.py index 05a8550ec166b..c8ba2914d783a 100644 --- a/examples/linear_model/plot_nnls.py +++ b/examples/linear_model/plot_nnls.py @@ -9,9 +9,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.metrics import r2_score # %% diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py index 657a0cddd4c7f..2d1930eab1b2a 100644 --- a/examples/linear_model/plot_ols.py +++ b/examples/linear_model/plot_ols.py @@ -20,7 +20,6 @@ import matplotlib.pyplot as plt import numpy as np - from sklearn import datasets, linear_model from sklearn.metrics import mean_squared_error, r2_score diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py index a027eac2ca707..222226c6b28c2 100644 --- a/examples/linear_model/plot_ols_3d.py +++ b/examples/linear_model/plot_ols_3d.py @@ -17,9 +17,8 @@ # %% # First we load the diabetes dataset. -import numpy as np - from sklearn import datasets +import numpy as np X, y = datasets.load_diabetes(return_X_y=True) indices = (0, 1) diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py index 78c1ae69dbbca..4c668f97786be 100644 --- a/examples/linear_model/plot_ols_ridge_variance.py +++ b/examples/linear_model/plot_ols_ridge_variance.py @@ -25,8 +25,8 @@ # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn import linear_model diff --git a/examples/linear_model/plot_omp.py b/examples/linear_model/plot_omp.py index 8bcf1383f2536..94567409b3841 100644 --- a/examples/linear_model/plot_omp.py +++ b/examples/linear_model/plot_omp.py @@ -10,9 +10,9 @@ import matplotlib.pyplot as plt import numpy as np - +from sklearn.linear_model import OrthogonalMatchingPursuit +from sklearn.linear_model import OrthogonalMatchingPursuitCV from sklearn.datasets import make_sparse_coded_signal -from sklearn.linear_model import OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV n_components, n_features = 512, 100 n_nonzero_coefs = 17 diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py index 78fe9211fae08..5ef8f56980dea 100644 --- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py +++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py @@ -41,11 +41,10 @@ # Olivier Grisel # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt import pandas as pd -from sklearn.datasets import fetch_openml ############################################################################## # The French Motor Third-Party Liability Claims dataset @@ -54,6 +53,8 @@ # Let's load the motor claim dataset from OpenML: # https://www.openml.org/d/41214 +from sklearn.datasets import fetch_openml + df = fetch_openml(data_id=41214, as_frame=True, parser="pandas").frame df @@ -96,14 +97,11 @@ # In order to fit linear models with those predictors it is therefore # necessary to perform standard feature transformations as follows: -from sklearn.compose import ColumnTransformer from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import ( - FunctionTransformer, - KBinsDiscretizer, - OneHotEncoder, - StandardScaler, -) +from sklearn.preprocessing import FunctionTransformer, OneHotEncoder +from sklearn.preprocessing import StandardScaler, KBinsDiscretizer +from sklearn.compose import ColumnTransformer + log_scale_transformer = make_pipeline( FunctionTransformer(np.log, validate=False), StandardScaler() @@ -137,8 +135,8 @@ # the training sample. from sklearn.dummy import DummyRegressor -from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline +from sklearn.model_selection import train_test_split df_train, df_test = train_test_split(df, test_size=0.33, random_state=0) @@ -154,11 +152,9 @@ # Let's compute the performance of this constant prediction baseline with 3 # different regression metrics: -from sklearn.metrics import ( - mean_absolute_error, - mean_poisson_deviance, - mean_squared_error, -) +from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_poisson_deviance def score_estimator(estimator, df_test): @@ -213,6 +209,7 @@ def score_estimator(estimator, df_test): from sklearn.linear_model import Ridge + ridge_glm = Pipeline( [ ("preprocessor", linear_model_preprocessor), @@ -284,6 +281,7 @@ def score_estimator(estimator, df_test): from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.preprocessing import OrdinalEncoder + tree_preprocessor = ColumnTransformer( [ ( diff --git a/examples/linear_model/plot_polynomial_interpolation.py b/examples/linear_model/plot_polynomial_interpolation.py index f648b7aea762d..ac2fe28de870d 100644 --- a/examples/linear_model/plot_polynomial_interpolation.py +++ b/examples/linear_model/plot_polynomial_interpolation.py @@ -42,12 +42,13 @@ # Malte Londschien # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.linear_model import Ridge -from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures, SplineTransformer +from sklearn.pipeline import make_pipeline + # %% # We start by defining a function that we intend to approximate and prepare diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py index b623e116f5c31..b4e1534efc54f 100644 --- a/examples/linear_model/plot_quantile_regression.py +++ b/examples/linear_model/plot_quantile_regression.py @@ -246,7 +246,8 @@ # distributed target to make it more interesting as mean and median are not # equal. from sklearn.linear_model import LinearRegression -from sklearn.metrics import mean_absolute_error, mean_squared_error +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_squared_error linear_regression = LinearRegression() quantile_regression = QuantileRegressor(quantile=0.5, alpha=0) diff --git a/examples/linear_model/plot_ransac.py b/examples/linear_model/plot_ransac.py index bb3336dc1e364..81670061a6609 100644 --- a/examples/linear_model/plot_ransac.py +++ b/examples/linear_model/plot_ransac.py @@ -11,7 +11,8 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn import datasets, linear_model +from sklearn import linear_model, datasets + n_samples = 1000 n_outliers = 50 diff --git a/examples/linear_model/plot_ridge_path.py b/examples/linear_model/plot_ridge_path.py index 01f9d45a63f8d..66f8fd9eb6c23 100644 --- a/examples/linear_model/plot_ridge_path.py +++ b/examples/linear_model/plot_ridge_path.py @@ -30,9 +30,8 @@ # Author: Fabian Pedregosa -- # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import linear_model # X is the 10x10 Hilbert matrix diff --git a/examples/linear_model/plot_robust_fit.py b/examples/linear_model/plot_robust_fit.py index 79213c9a8e83e..c9fe49fc0d416 100644 --- a/examples/linear_model/plot_robust_fit.py +++ b/examples/linear_model/plot_robust_fit.py @@ -30,18 +30,18 @@ """ -import numpy as np from matplotlib import pyplot as plt +import numpy as np from sklearn.linear_model import ( - HuberRegressor, LinearRegression, - RANSACRegressor, TheilSenRegressor, + RANSACRegressor, + HuberRegressor, ) from sklearn.metrics import mean_squared_error -from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures +from sklearn.pipeline import make_pipeline np.random.seed(42) diff --git a/examples/linear_model/plot_sgd_comparison.py b/examples/linear_model/plot_sgd_comparison.py index 0477e42cf5947..5ab0d6b1b2827 100644 --- a/examples/linear_model/plot_sgd_comparison.py +++ b/examples/linear_model/plot_sgd_comparison.py @@ -9,17 +9,14 @@ # Author: Rob Zinkov # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.linear_model import ( - LogisticRegression, - PassiveAggressiveClassifier, - Perceptron, - SGDClassifier, -) + from sklearn.model_selection import train_test_split +from sklearn.linear_model import SGDClassifier, Perceptron +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.linear_model import LogisticRegression heldout = [0.95, 0.90, 0.75, 0.50, 0.01] # Number of rounds to fit and evaluate an estimator. diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py index ceff4ba8ee056..123180ac62a9b 100644 --- a/examples/linear_model/plot_sgd_early_stopping.py +++ b/examples/linear_model/plot_sgd_early_stopping.py @@ -41,19 +41,19 @@ # # License: BSD 3 clause -import sys import time +import sys -import matplotlib.pyplot as plt -import numpy as np import pandas as pd +import numpy as np +import matplotlib.pyplot as plt from sklearn import linear_model from sklearn.datasets import fetch_openml -from sklearn.exceptions import ConvergenceWarning from sklearn.model_selection import train_test_split -from sklearn.utils import shuffle from sklearn.utils._testing import ignore_warnings +from sklearn.exceptions import ConvergenceWarning +from sklearn.utils import shuffle def load_mnist(n_samples=None, class_0="0", class_1="8"): diff --git a/examples/linear_model/plot_sgd_iris.py b/examples/linear_model/plot_sgd_iris.py index 5d9b923f9b444..64dca07396d54 100644 --- a/examples/linear_model/plot_sgd_iris.py +++ b/examples/linear_model/plot_sgd_iris.py @@ -9,12 +9,11 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.linear_model import SGDClassifier +from sklearn.inspection import DecisionBoundaryDisplay # import some data to play with iris = datasets.load_iris() diff --git a/examples/linear_model/plot_sgd_loss_functions.py b/examples/linear_model/plot_sgd_loss_functions.py index 140562184b946..a1f74dca4d6af 100644 --- a/examples/linear_model/plot_sgd_loss_functions.py +++ b/examples/linear_model/plot_sgd_loss_functions.py @@ -8,8 +8,8 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt def modified_huber_loss(y_true, y_pred): diff --git a/examples/linear_model/plot_sgd_penalties.py b/examples/linear_model/plot_sgd_penalties.py index ff71dba5f20a3..0413751fb41a9 100644 --- a/examples/linear_model/plot_sgd_penalties.py +++ b/examples/linear_model/plot_sgd_penalties.py @@ -11,8 +11,8 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt l1_color = "navy" l2_color = "c" diff --git a/examples/linear_model/plot_sgd_separating_hyperplane.py b/examples/linear_model/plot_sgd_separating_hyperplane.py index e84ab7c519ae9..af288fcd3dde0 100644 --- a/examples/linear_model/plot_sgd_separating_hyperplane.py +++ b/examples/linear_model/plot_sgd_separating_hyperplane.py @@ -9,11 +9,10 @@ """ -import matplotlib.pyplot as plt import numpy as np - -from sklearn.datasets import make_blobs +import matplotlib.pyplot as plt from sklearn.linear_model import SGDClassifier +from sklearn.datasets import make_blobs # we create 50 separable points X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60) diff --git a/examples/linear_model/plot_sgd_weighted_samples.py b/examples/linear_model/plot_sgd_weighted_samples.py index 4d605e99b4e49..2db52042b075f 100644 --- a/examples/linear_model/plot_sgd_weighted_samples.py +++ b/examples/linear_model/plot_sgd_weighted_samples.py @@ -8,9 +8,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import linear_model # we create 20 points diff --git a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py index 2f03768f50532..c25f4a84d91e0 100644 --- a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py +++ b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py @@ -19,14 +19,13 @@ """ # noqa: E501 -import matplotlib -import matplotlib.pyplot as plt import numpy as np - -from sklearn.kernel_approximation import Nystroem +import matplotlib.pyplot as plt +import matplotlib +from sklearn.svm import OneClassSVM from sklearn.linear_model import SGDOneClassSVM +from sklearn.kernel_approximation import Nystroem from sklearn.pipeline import make_pipeline -from sklearn.svm import OneClassSVM font = {"weight": "normal", "size": 15} diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py index f62208aab154a..507dda5c76901 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py +++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py @@ -29,9 +29,9 @@ import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split +from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn") t0 = timeit.default_timer() diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py index e6746b8fb0896..37327aeaa4cb7 100644 --- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py +++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py @@ -21,7 +21,6 @@ # License: BSD 3 clause import time - import matplotlib.pyplot as plt import numpy as np diff --git a/examples/linear_model/plot_theilsen.py b/examples/linear_model/plot_theilsen.py index eb0ac4966841d..b380baf705a76 100644 --- a/examples/linear_model/plot_theilsen.py +++ b/examples/linear_model/plot_theilsen.py @@ -39,11 +39,10 @@ # License: BSD 3 clause import time - -import matplotlib.pyplot as plt import numpy as np - -from sklearn.linear_model import LinearRegression, RANSACRegressor, TheilSenRegressor +import matplotlib.pyplot as plt +from sklearn.linear_model import LinearRegression, TheilSenRegressor +from sklearn.linear_model import RANSACRegressor estimators = [ ("OLS", LinearRegression()), diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index 0af020890264e..3d86903fcdeff 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -46,16 +46,14 @@ from functools import partial -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt import pandas as pd from sklearn.datasets import fetch_openml -from sklearn.metrics import ( - mean_absolute_error, - mean_squared_error, - mean_tweedie_deviance, -) +from sklearn.metrics import mean_tweedie_deviance +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_squared_error def load_mtpl2(n_samples=100000): @@ -203,8 +201,6 @@ def score_estimator( return res -from sklearn.compose import ColumnTransformer - # %% # Loading datasets, basic feature extraction and target definitions # ----------------------------------------------------------------- @@ -214,12 +210,10 @@ def score_estimator( # containing the claim amount (``ClaimAmount``) for the same policy ids # (``IDpol``). from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import ( - FunctionTransformer, - KBinsDiscretizer, - OneHotEncoder, - StandardScaler, -) +from sklearn.preprocessing import FunctionTransformer, OneHotEncoder +from sklearn.preprocessing import StandardScaler, KBinsDiscretizer +from sklearn.compose import ColumnTransformer + df = load_mtpl2(n_samples=60000) @@ -265,8 +259,6 @@ def score_estimator( with pd.option_context("display.max_columns", 15): print(df[df.ClaimAmount > 0].head()) -from sklearn.linear_model import PoissonRegressor - # %% # # Frequency model -- Poisson distribution @@ -279,6 +271,8 @@ def score_estimator( # Here we model the frequency ``y = ClaimNb / Exposure``, which is still a # (scaled) Poisson distribution, and use ``Exposure`` as `sample_weight`. from sklearn.model_selection import train_test_split +from sklearn.linear_model import PoissonRegressor + df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0) @@ -376,6 +370,7 @@ def score_estimator( # more than one claim. from sklearn.linear_model import GammaRegressor + mask_train = df_train["ClaimAmount"] > 0 mask_test = df_test["ClaimAmount"] > 0 @@ -485,6 +480,7 @@ def score_estimator( # regardless of `power`. from sklearn.linear_model import TweedieRegressor + glm_pure_premium = TweedieRegressor(power=1.9, alpha=0.1, max_iter=10000) glm_pure_premium.fit( X_train, df_train["PurePremium"], sample_weight=df_train["Exposure"] diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py index e10331e7adc4c..310a476290841 100644 --- a/examples/manifold/plot_compare_methods.py +++ b/examples/manifold/plot_compare_methods.py @@ -28,14 +28,14 @@ # # We start by generating the S-curve dataset. +from numpy.random import RandomState import matplotlib.pyplot as plt +from matplotlib import ticker # unused but required import for doing 3d projections with matplotlib < 3.2 import mpl_toolkits.mplot3d # noqa: F401 -from matplotlib import ticker -from numpy.random import RandomState -from sklearn import datasets, manifold +from sklearn import manifold, datasets rng = RandomState(0) diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py index 99520fb281132..bafee1b7a7eb0 100644 --- a/examples/manifold/plot_lle_digits.py +++ b/examples/manifold/plot_lle_digits.py @@ -45,7 +45,6 @@ # scattered across it. import numpy as np from matplotlib import offsetbox - from sklearn.preprocessing import MinMaxScaler @@ -104,11 +103,11 @@ def plot_embedding(X, title): from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.ensemble import RandomTreesEmbedding from sklearn.manifold import ( - MDS, - TSNE, Isomap, LocallyLinearEmbedding, + MDS, SpectralEmbedding, + TSNE, ) from sklearn.neighbors import NeighborhoodComponentsAnalysis from sklearn.pipeline import make_pipeline diff --git a/examples/manifold/plot_manifold_sphere.py b/examples/manifold/plot_manifold_sphere.py index dd16ddbf86526..b68400e0b7e66 100644 --- a/examples/manifold/plot_manifold_sphere.py +++ b/examples/manifold/plot_manifold_sphere.py @@ -29,19 +29,17 @@ # Author: Jaques Grobler # License: BSD 3 clause -import warnings from time import time - -import matplotlib.pyplot as plt - -# Unused but required import for doing 3d projections with matplotlib < 3.2 -import mpl_toolkits.mplot3d # noqa: F401 import numpy as np +import matplotlib.pyplot as plt from matplotlib.ticker import NullFormatter - from sklearn import manifold from sklearn.utils import check_random_state +# Unused but required import for doing 3d projections with matplotlib < 3.2 +import mpl_toolkits.mplot3d # noqa: F401 +import warnings + # Variables for manifold learning. n_neighbors = 10 n_samples = 1000 diff --git a/examples/manifold/plot_mds.py b/examples/manifold/plot_mds.py index 87db0f5ad3a50..4a90268eba902 100644 --- a/examples/manifold/plot_mds.py +++ b/examples/manifold/plot_mds.py @@ -14,12 +14,13 @@ # License: BSD import numpy as np + from matplotlib import pyplot as plt from matplotlib.collections import LineCollection from sklearn import manifold -from sklearn.decomposition import PCA from sklearn.metrics import euclidean_distances +from sklearn.decomposition import PCA EPSILON = np.finfo(np.float32).eps n_samples = 20 diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py index c4dfa0a0402af..4bad9afd43ab6 100644 --- a/examples/manifold/plot_swissroll.py +++ b/examples/manifold/plot_swissroll.py @@ -15,8 +15,8 @@ # We start by generating the Swiss Roll dataset. import matplotlib.pyplot as plt +from sklearn import manifold, datasets -from sklearn import datasets, manifold sr_points, sr_color = datasets.make_swiss_roll(n_samples=1500, random_state=0) diff --git a/examples/manifold/plot_t_sne_perplexity.py b/examples/manifold/plot_t_sne_perplexity.py index 65021c4ea6fcf..20fd00a835620 100644 --- a/examples/manifold/plot_t_sne_perplexity.py +++ b/examples/manifold/plot_t_sne_perplexity.py @@ -27,13 +27,12 @@ # Author: Narine Kokhlikyan # License: BSD -from time import time - -import matplotlib.pyplot as plt import numpy as np -from matplotlib.ticker import NullFormatter +import matplotlib.pyplot as plt -from sklearn import datasets, manifold +from matplotlib.ticker import NullFormatter +from sklearn import manifold, datasets +from time import time n_samples = 150 n_components = 2 diff --git a/examples/miscellaneous/plot_anomaly_comparison.py b/examples/miscellaneous/plot_anomaly_comparison.py index 7d5bc7d59bfa1..efb4f6d86edfc 100644 --- a/examples/miscellaneous/plot_anomaly_comparison.py +++ b/examples/miscellaneous/plot_anomaly_comparison.py @@ -68,17 +68,17 @@ import time +import numpy as np import matplotlib import matplotlib.pyplot as plt -import numpy as np from sklearn import svm +from sklearn.datasets import make_moons, make_blobs from sklearn.covariance import EllipticEnvelope -from sklearn.datasets import make_blobs, make_moons from sklearn.ensemble import IsolationForest -from sklearn.kernel_approximation import Nystroem -from sklearn.linear_model import SGDOneClassSVM from sklearn.neighbors import LocalOutlierFactor +from sklearn.linear_model import SGDOneClassSVM +from sklearn.kernel_approximation import Nystroem from sklearn.pipeline import make_pipeline matplotlib.rcParams["contour.negative_linestyle"] = "solid" diff --git a/examples/miscellaneous/plot_changed_only_pprint_parameter.py b/examples/miscellaneous/plot_changed_only_pprint_parameter.py index aef6d28402a2b..c213c4b5d98af 100644 --- a/examples/miscellaneous/plot_changed_only_pprint_parameter.py +++ b/examples/miscellaneous/plot_changed_only_pprint_parameter.py @@ -11,8 +11,9 @@ """ -from sklearn import set_config from sklearn.linear_model import LogisticRegression +from sklearn import set_config + lr = LogisticRegression(penalty="l1") print("Default representation:") diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py index 24095de3b5cae..f108beced7a00 100644 --- a/examples/miscellaneous/plot_display_object_visualization.py +++ b/examples/miscellaneous/plot_display_object_visualization.py @@ -24,10 +24,10 @@ # data is split into a train and test dataset and a logistic regression is # fitted with the train dataset. from sklearn.datasets import fetch_openml +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler X, y = fetch_openml(data_id=1464, return_X_y=True, parser="pandas") X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y) @@ -41,7 +41,8 @@ # With the fitted model, we compute the predictions of the model on the test # dataset. These predictions are used to compute the confustion matrix which # is plotted with the :class:`ConfusionMatrixDisplay` -from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix +from sklearn.metrics import confusion_matrix +from sklearn.metrics import ConfusionMatrixDisplay y_pred = clf.predict(X_test) cm = confusion_matrix(y_test, y_pred) @@ -55,7 +56,8 @@ # The roc curve requires either the probabilities or the non-thresholded # decision values from the estimator. Since the logistic regression provides # a decision function, we will use it to plot the roc curve: -from sklearn.metrics import RocCurveDisplay, roc_curve +from sklearn.metrics import roc_curve +from sklearn.metrics import RocCurveDisplay y_score = clf.decision_function(X_test) @@ -67,7 +69,8 @@ ############################################################################## # Similarly, the precision recall curve can be plotted using `y_score` from # the prevision sections. -from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import PrecisionRecallDisplay prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=clf.classes_[1]) pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot() diff --git a/examples/miscellaneous/plot_isotonic_regression.py b/examples/miscellaneous/plot_isotonic_regression.py index a1c1174c9e9de..0240a8dec34b5 100644 --- a/examples/miscellaneous/plot_isotonic_regression.py +++ b/examples/miscellaneous/plot_isotonic_regression.py @@ -23,12 +23,12 @@ # Alexandre Gramfort # License: BSD -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib.collections import LineCollection -from sklearn.isotonic import IsotonicRegression from sklearn.linear_model import LinearRegression +from sklearn.isotonic import IsotonicRegression from sklearn.utils import check_random_state n = 100 diff --git a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py index 85161a6ee51bb..6fd9d3614804c 100644 --- a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py +++ b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py @@ -15,16 +15,13 @@ import sys from time import time - -import matplotlib.pyplot as plt import numpy as np - -from sklearn.datasets import fetch_20newsgroups_vectorized, load_digits +import matplotlib.pyplot as plt +from sklearn.random_projection import johnson_lindenstrauss_min_dim +from sklearn.random_projection import SparseRandomProjection +from sklearn.datasets import fetch_20newsgroups_vectorized +from sklearn.datasets import load_digits from sklearn.metrics.pairwise import euclidean_distances -from sklearn.random_projection import ( - SparseRandomProjection, - johnson_lindenstrauss_min_dim, -) # %% # Theoretical bounds diff --git a/examples/miscellaneous/plot_kernel_approximation.py b/examples/miscellaneous/plot_kernel_approximation.py index 372b8f9a37197..7dfc1e31220e8 100644 --- a/examples/miscellaneous/plot_kernel_approximation.py +++ b/examples/miscellaneous/plot_kernel_approximation.py @@ -38,16 +38,15 @@ # Andreas Mueller # License: BSD 3 clause -from time import time - # Standard scientific Python imports import matplotlib.pyplot as plt import numpy as np +from time import time # Import datasets, classifiers and performance metrics -from sklearn import datasets, pipeline, svm +from sklearn import datasets, svm, pipeline +from sklearn.kernel_approximation import RBFSampler, Nystroem from sklearn.decomposition import PCA -from sklearn.kernel_approximation import Nystroem, RBFSampler # The digits dataset digits = datasets.load_digits(n_class=9) diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py index 1882a838e5ef1..dd696443d6b31 100644 --- a/examples/miscellaneous/plot_kernel_ridge_regression.py +++ b/examples/miscellaneous/plot_kernel_ridge_regression.py @@ -40,9 +40,9 @@ # Construct the kernel-based regression models # -------------------------------------------- -from sklearn.kernel_ridge import KernelRidge from sklearn.model_selection import GridSearchCV from sklearn.svm import SVR +from sklearn.kernel_ridge import KernelRidge train_size = 100 diff --git a/examples/miscellaneous/plot_multilabel.py b/examples/miscellaneous/plot_multilabel.py index b424c3253104a..aded595258fea 100644 --- a/examples/miscellaneous/plot_multilabel.py +++ b/examples/miscellaneous/plot_multilabel.py @@ -32,14 +32,14 @@ # Authors: Vlad Niculae, Mathieu Blondel # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn.cross_decomposition import CCA from sklearn.datasets import make_multilabel_classification -from sklearn.decomposition import PCA from sklearn.multiclass import OneVsRestClassifier from sklearn.svm import SVC +from sklearn.decomposition import PCA +from sklearn.cross_decomposition import CCA def plot_hyperplane(clf, min_x, max_x, linestyle, label): diff --git a/examples/miscellaneous/plot_multioutput_face_completion.py b/examples/miscellaneous/plot_multioutput_face_completion.py index 62070bc05e488..31e73195747a5 100644 --- a/examples/miscellaneous/plot_multioutput_face_completion.py +++ b/examples/miscellaneous/plot_multioutput_face_completion.py @@ -12,14 +12,16 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import fetch_olivetti_faces +from sklearn.utils.validation import check_random_state + from sklearn.ensemble import ExtraTreesRegressor -from sklearn.linear_model import LinearRegression, RidgeCV from sklearn.neighbors import KNeighborsRegressor -from sklearn.utils.validation import check_random_state +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import RidgeCV # Load the faces datasets data, targets = fetch_olivetti_faces(return_X_y=True) diff --git a/examples/miscellaneous/plot_outlier_detection_bench.py b/examples/miscellaneous/plot_outlier_detection_bench.py index 0e2a158b43df0..f2d0b922710ca 100644 --- a/examples/miscellaneous/plot_outlier_detection_bench.py +++ b/examples/miscellaneous/plot_outlier_detection_bench.py @@ -32,10 +32,9 @@ # The `preprocess_dataset` function returns data and target. import numpy as np -import pandas as pd - -from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml +from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml from sklearn.preprocessing import LabelBinarizer +import pandas as pd rng = np.random.RandomState(42) @@ -119,8 +118,8 @@ def preprocess_dataset(dataset_name): # `compute_prediction` function returns average outlier score of X. -from sklearn.ensemble import IsolationForest from sklearn.neighbors import LocalOutlierFactor +from sklearn.ensemble import IsolationForest def compute_prediction(X, model_name): @@ -148,9 +147,7 @@ def compute_prediction(X, model_name): import math - import matplotlib.pyplot as plt - from sklearn.metrics import RocCurveDisplay datasets_name = [ diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py index 19873ad7e2af2..604ead891877c 100644 --- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py +++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py @@ -14,15 +14,15 @@ """ # noqa: E501 -import matplotlib.pyplot as plt import pandas as pd - +import matplotlib.pyplot as plt from sklearn.datasets import load_diabetes -from sklearn.inspection import PartialDependenceDisplay from sklearn.neural_network import MLPRegressor -from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import make_pipeline from sklearn.tree import DecisionTreeRegressor +from sklearn.inspection import PartialDependenceDisplay + # %% # Train models on the diabetes dataset diff --git a/examples/miscellaneous/plot_pipeline_display.py b/examples/miscellaneous/plot_pipeline_display.py index 9642bb56b903f..f0fea8d2f3a27 100755 --- a/examples/miscellaneous/plot_pipeline_display.py +++ b/examples/miscellaneous/plot_pipeline_display.py @@ -19,10 +19,10 @@ # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual # representation. -from sklearn import set_config -from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import LogisticRegression +from sklearn import set_config steps = [ ("preprocessing", StandardScaler()), @@ -53,9 +53,9 @@ # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual # representation. -from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline -from sklearn.preprocessing import PolynomialFeatures, StandardScaler +from sklearn.preprocessing import StandardScaler, PolynomialFeatures +from sklearn.linear_model import LogisticRegression steps = [ ("standard_scaler", StandardScaler()), @@ -73,9 +73,9 @@ # a classifier, :class:`~sklearn.svm.SVC`, and displays its visual # representation. -from sklearn.decomposition import PCA from sklearn.pipeline import Pipeline from sklearn.svm import SVC +from sklearn.decomposition import PCA steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))] pipe = Pipeline(steps) @@ -90,12 +90,12 @@ # representation. import numpy as np - -from sklearn.compose import ColumnTransformer +from sklearn.pipeline import make_pipeline +from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer -from sklearn.linear_model import LogisticRegression -from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.linear_model import LogisticRegression numeric_preprocessor = Pipeline( steps=[ @@ -133,13 +133,13 @@ # representation. import numpy as np - +from sklearn.pipeline import make_pipeline +from sklearn.pipeline import Pipeline +from sklearn.impute import SimpleImputer from sklearn.compose import ColumnTransformer +from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier -from sklearn.impute import SimpleImputer from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.preprocessing import OneHotEncoder, StandardScaler numeric_preprocessor = Pipeline( steps=[ diff --git a/examples/miscellaneous/plot_roc_curve_visualization_api.py b/examples/miscellaneous/plot_roc_curve_visualization_api.py index 7fc8df9724337..b4e08493c77d4 100644 --- a/examples/miscellaneous/plot_roc_curve_visualization_api.py +++ b/examples/miscellaneous/plot_roc_curve_visualization_api.py @@ -15,12 +15,11 @@ # First, we load the wine dataset and convert it to a binary classification # problem. Then, we train a support vector classifier on a training dataset. import matplotlib.pyplot as plt - -from sklearn.datasets import load_wine +from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import RocCurveDisplay +from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split -from sklearn.svm import SVC X, y = load_wine(return_X_y=True) y = y == 2 diff --git a/examples/mixture/plot_concentration_prior.py b/examples/mixture/plot_concentration_prior.py index f32e08ac6a26d..b143cfed10318 100644 --- a/examples/mixture/plot_concentration_prior.py +++ b/examples/mixture/plot_concentration_prior.py @@ -32,10 +32,10 @@ # Author: Thierry Guillemot # License: BSD 3 clause +import numpy as np import matplotlib as mpl -import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt -import numpy as np +import matplotlib.gridspec as gridspec from sklearn.mixture import BayesianGaussianMixture diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py index d9481cc78758d..675aa341696ac 100644 --- a/examples/mixture/plot_gmm.py +++ b/examples/mixture/plot_gmm.py @@ -26,10 +26,10 @@ import itertools -import matplotlib as mpl -import matplotlib.pyplot as plt import numpy as np from scipy import linalg +import matplotlib.pyplot as plt +import matplotlib as mpl from sklearn import mixture diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py index cd12324f00c08..95b5d2c1ba90f 100644 --- a/examples/mixture/plot_gmm_covariances.py +++ b/examples/mixture/plot_gmm_covariances.py @@ -33,6 +33,7 @@ import matplotlib as mpl import matplotlib.pyplot as plt + import numpy as np from sklearn import datasets diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py index df9be5189a996..23a4788b799b4 100644 --- a/examples/mixture/plot_gmm_init.py +++ b/examples/mixture/plot_gmm_init.py @@ -37,14 +37,12 @@ # Author: Gordon Walsh # Data generation code from Jake Vanderplas -from timeit import default_timer as timer - import matplotlib.pyplot as plt import numpy as np - -from sklearn.datasets._samples_generator import make_blobs from sklearn.mixture import GaussianMixture from sklearn.utils.extmath import row_norms +from sklearn.datasets._samples_generator import make_blobs +from timeit import default_timer as timer print(__doc__) diff --git a/examples/mixture/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py index 062bdfd4d6d67..70d58f22f8f41 100644 --- a/examples/mixture/plot_gmm_pdf.py +++ b/examples/mixture/plot_gmm_pdf.py @@ -9,10 +9,9 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib.colors import LogNorm - from sklearn import mixture n_samples = 300 diff --git a/examples/mixture/plot_gmm_selection.py b/examples/mixture/plot_gmm_selection.py index acfbaf7ed04ff..82175091ee049 100644 --- a/examples/mixture/plot_gmm_selection.py +++ b/examples/mixture/plot_gmm_selection.py @@ -16,12 +16,12 @@ """ +import numpy as np import itertools -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np from scipy import linalg +import matplotlib.pyplot as plt +import matplotlib as mpl from sklearn import mixture diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py index 3d663e195ceb5..76f0d30e4e9d8 100644 --- a/examples/mixture/plot_gmm_sin.py +++ b/examples/mixture/plot_gmm_sin.py @@ -41,10 +41,10 @@ import itertools -import matplotlib as mpl -import matplotlib.pyplot as plt import numpy as np from scipy import linalg +import matplotlib.pyplot as plt +import matplotlib as mpl from sklearn import mixture diff --git a/examples/model_selection/grid_search_text_feature_extraction.py b/examples/model_selection/grid_search_text_feature_extraction.py index 2440c7dce5427..91801b361265b 100644 --- a/examples/model_selection/grid_search_text_feature_extraction.py +++ b/examples/model_selection/grid_search_text_feature_extraction.py @@ -50,12 +50,13 @@ # Data loading # ------------ -import logging from pprint import pprint from time import time +import logging from sklearn.datasets import fetch_20newsgroups -from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_extraction.text import TfidfTransformer from sklearn.linear_model import SGDClassifier from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py index 278083a994e58..b891564db4025 100644 --- a/examples/model_selection/plot_confusion_matrix.py +++ b/examples/model_selection/plot_confusion_matrix.py @@ -24,12 +24,12 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn import datasets, svm -from sklearn.metrics import ConfusionMatrixDisplay +from sklearn import svm, datasets from sklearn.model_selection import train_test_split +from sklearn.metrics import ConfusionMatrixDisplay # import some data to play with iris = datasets.load_iris() diff --git a/examples/model_selection/plot_cv_indices.py b/examples/model_selection/plot_cv_indices.py index e6c3580c787f0..8b70191e4abd1 100644 --- a/examples/model_selection/plot_cv_indices.py +++ b/examples/model_selection/plot_cv_indices.py @@ -12,20 +12,19 @@ """ -import matplotlib.pyplot as plt -import numpy as np -from matplotlib.patches import Patch - from sklearn.model_selection import ( - GroupKFold, - GroupShuffleSplit, + TimeSeriesSplit, KFold, ShuffleSplit, - StratifiedGroupKFold, StratifiedKFold, + GroupShuffleSplit, + GroupKFold, StratifiedShuffleSplit, - TimeSeriesSplit, + StratifiedGroupKFold, ) +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.patches import Patch rng = np.random.RandomState(1338) cmap_data = plt.cm.Paired diff --git a/examples/model_selection/plot_cv_predict.py b/examples/model_selection/plot_cv_predict.py index ca0726c30d534..82ef0b8b81ae6 100644 --- a/examples/model_selection/plot_cv_predict.py +++ b/examples/model_selection/plot_cv_predict.py @@ -9,10 +9,10 @@ """ -import matplotlib.pyplot as plt - -from sklearn import datasets, linear_model +from sklearn import datasets from sklearn.model_selection import cross_val_predict +from sklearn import linear_model +import matplotlib.pyplot as plt lr = linear_model.LinearRegression() X, y = datasets.load_diabetes(return_X_y=True) diff --git a/examples/model_selection/plot_grid_search_digits.py b/examples/model_selection/plot_grid_search_digits.py index 77a2b5c92de33..2aaa64043749b 100644 --- a/examples/model_selection/plot_grid_search_digits.py +++ b/examples/model_selection/plot_grid_search_digits.py @@ -17,8 +17,9 @@ """ from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.model_selection import GridSearchCV from sklearn.metrics import classification_report -from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.svm import SVC # Loading the Digits dataset diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py index a851ee5f9bb19..53513aa4ba1ec 100644 --- a/examples/model_selection/plot_grid_search_refit_callable.py +++ b/examples/model_selection/plot_grid_search_refit_callable.py @@ -20,8 +20,8 @@ # Author: Wenhao Zhang -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import load_digits from sklearn.decomposition import PCA diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py index fbeb485d8db44..179d860b42128 100644 --- a/examples/model_selection/plot_grid_search_stats.py +++ b/examples/model_selection/plot_grid_search_stats.py @@ -16,7 +16,6 @@ import matplotlib.pyplot as plt import seaborn as sns - from sklearn.datasets import make_moons X, y = make_moons(noise=0.352, random_state=1, n_samples=100) diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py index dab4bf04dca55..25f43d8b8a3e4 100644 --- a/examples/model_selection/plot_learning_curve.py +++ b/examples/model_selection/plot_learning_curve.py @@ -17,13 +17,13 @@ """ -import matplotlib.pyplot as plt import numpy as np - -from sklearn.datasets import load_digits -from sklearn.model_selection import ShuffleSplit, learning_curve +import matplotlib.pyplot as plt from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC +from sklearn.datasets import load_digits +from sklearn.model_selection import learning_curve +from sklearn.model_selection import ShuffleSplit def plot_learning_curve( diff --git a/examples/model_selection/plot_multi_metric_evaluation.py b/examples/model_selection/plot_multi_metric_evaluation.py index 674bf8bc1b07c..e47e67e086ccb 100644 --- a/examples/model_selection/plot_multi_metric_evaluation.py +++ b/examples/model_selection/plot_multi_metric_evaluation.py @@ -23,8 +23,9 @@ from matplotlib import pyplot as plt from sklearn.datasets import make_hastie_10_2 -from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import GridSearchCV +from sklearn.metrics import make_scorer +from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier # %% diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py index 48f70dab43cf8..81d89d93afe91 100644 --- a/examples/model_selection/plot_nested_cross_validation_iris.py +++ b/examples/model_selection/plot_nested_cross_validation_iris.py @@ -44,12 +44,11 @@ """ -import numpy as np -from matplotlib import pyplot as plt - from sklearn.datasets import load_iris -from sklearn.model_selection import GridSearchCV, KFold, cross_val_score +from matplotlib import pyplot as plt from sklearn.svm import SVC +from sklearn.model_selection import GridSearchCV, cross_val_score, KFold +import numpy as np # Number of random trials NUM_TRIALS = 30 diff --git a/examples/model_selection/plot_permutation_tests_for_classification.py b/examples/model_selection/plot_permutation_tests_for_classification.py index f81115c64ec0d..23e3688c437f5 100644 --- a/examples/model_selection/plot_permutation_tests_for_classification.py +++ b/examples/model_selection/plot_permutation_tests_for_classification.py @@ -58,8 +58,9 @@ # the percentage of permutations for which the score obtained is greater # that the score obtained using the original data. -from sklearn.model_selection import StratifiedKFold, permutation_test_score from sklearn.svm import SVC +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import permutation_test_score clf = SVC(kernel="linear", random_state=7) cv = StratifiedKFold(2, shuffle=True, random_state=0) diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py index 5e523b0958e42..4d9ebcdc4abe2 100644 --- a/examples/model_selection/plot_precision_recall.py +++ b/examples/model_selection/plot_precision_recall.py @@ -100,7 +100,6 @@ # # We will use a Linear SVC classifier to differentiate two types of irises. import numpy as np - from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split @@ -195,7 +194,8 @@ # %% # The average precision score in multi-label settings # ................................................... -from sklearn.metrics import average_precision_score, precision_recall_curve +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import average_precision_score # For each class precision = dict() @@ -222,12 +222,11 @@ display.plot() _ = display.ax_.set_title("Micro-averaged over all classes") -from itertools import cycle - # %% # Plot Precision-Recall curve for each class and iso-f1 curves # ............................................................ import matplotlib.pyplot as plt +from itertools import cycle # setup plot details colors = cycle(["navy", "turquoise", "darkorange", "cornflowerblue", "teal"]) diff --git a/examples/model_selection/plot_randomized_search.py b/examples/model_selection/plot_randomized_search.py index aaf885b3ad63a..d5514a9b1c278 100644 --- a/examples/model_selection/plot_randomized_search.py +++ b/examples/model_selection/plot_randomized_search.py @@ -20,15 +20,15 @@ """ -from time import time - import numpy as np + +from time import time import scipy.stats as stats +from sklearn.utils.fixes import loguniform +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.datasets import load_digits from sklearn.linear_model import SGDClassifier -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV -from sklearn.utils.fixes import loguniform # get some data X, y = load_digits(return_X_y=True, n_class=3) diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py index 0f2d366c166ab..70657d7d9e068 100644 --- a/examples/model_selection/plot_roc.py +++ b/examples/model_selection/plot_roc.py @@ -33,16 +33,16 @@ """ -from itertools import cycle - -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt +from itertools import cycle -from sklearn import datasets, svm -from sklearn.metrics import auc, roc_auc_score, roc_curve +from sklearn import svm, datasets +from sklearn.metrics import roc_curve, auc from sklearn.model_selection import train_test_split -from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import label_binarize +from sklearn.multiclass import OneVsRestClassifier +from sklearn.metrics import roc_auc_score # Import some data to play with iris = datasets.load_iris() diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py index b87550846672c..791f9167f3333 100644 --- a/examples/model_selection/plot_roc_crossval.py +++ b/examples/model_selection/plot_roc_crossval.py @@ -54,7 +54,8 @@ import matplotlib.pyplot as plt from sklearn import svm -from sklearn.metrics import RocCurveDisplay, auc +from sklearn.metrics import auc +from sklearn.metrics import RocCurveDisplay from sklearn.model_selection import StratifiedKFold # Run classifier with cross-validation and plot ROC curves diff --git a/examples/model_selection/plot_successive_halving_heatmap.py b/examples/model_selection/plot_successive_halving_heatmap.py index ba808f5d893bc..c7104f6d7144b 100644 --- a/examples/model_selection/plot_successive_halving_heatmap.py +++ b/examples/model_selection/plot_successive_halving_heatmap.py @@ -14,10 +14,12 @@ import numpy as np import pandas as pd +from sklearn.svm import SVC from sklearn import datasets +from sklearn.model_selection import GridSearchCV from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.model_selection import GridSearchCV, HalvingGridSearchCV -from sklearn.svm import SVC +from sklearn.model_selection import HalvingGridSearchCV + # %% # We first define the parameter space for an :class:`~sklearn.svm.SVC` diff --git a/examples/model_selection/plot_successive_halving_iterations.py b/examples/model_selection/plot_successive_halving_iterations.py index 31805d308e269..bd2d5635e376e 100644 --- a/examples/model_selection/plot_successive_halving_iterations.py +++ b/examples/model_selection/plot_successive_halving_iterations.py @@ -10,15 +10,16 @@ """ -import matplotlib.pyplot as plt -import numpy as np import pandas as pd +from sklearn import datasets +import matplotlib.pyplot as plt from scipy.stats import randint +import numpy as np -from sklearn import datasets -from sklearn.ensemble import RandomForestClassifier from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.model_selection import HalvingRandomSearchCV +from sklearn.ensemble import RandomForestClassifier + # %% # We first define the parameter space and train a diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py index 2fce0c20688c2..528d3482be15b 100644 --- a/examples/model_selection/plot_train_error_vs_test_error.py +++ b/examples/model_selection/plot_train_error_vs_test_error.py @@ -19,7 +19,6 @@ # Generate sample data # -------------------- import numpy as np - from sklearn import linear_model from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split diff --git a/examples/model_selection/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py index 412946fc9ca8b..ae8450b50cea9 100644 --- a/examples/model_selection/plot_underfitting_overfitting.py +++ b/examples/model_selection/plot_underfitting_overfitting.py @@ -21,13 +21,12 @@ """ -import matplotlib.pyplot as plt import numpy as np - -from sklearn.linear_model import LinearRegression -from sklearn.model_selection import cross_val_score +import matplotlib.pyplot as plt from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import cross_val_score def true_fun(X): diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py index 0b6e65d186d6a..1b3c562594188 100644 --- a/examples/model_selection/plot_validation_curve.py +++ b/examples/model_selection/plot_validation_curve.py @@ -17,8 +17,8 @@ import numpy as np from sklearn.datasets import load_digits -from sklearn.model_selection import validation_curve from sklearn.svm import SVC +from sklearn.model_selection import validation_curve X, y = load_digits(return_X_y=True) subset_mask = np.isin(y, [1, 2]) # binary classification: 1 vs 2 diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py index 1df4ee3b8346b..e1f9feed43a97 100644 --- a/examples/multioutput/plot_classifier_chain_yeast.py +++ b/examples/multioutput/plot_classifier_chain_yeast.py @@ -36,15 +36,14 @@ # Author: Adam Kleczewski # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.datasets import fetch_openml -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import jaccard_score +from sklearn.multioutput import ClassifierChain from sklearn.model_selection import train_test_split from sklearn.multiclass import OneVsRestClassifier -from sklearn.multioutput import ClassifierChain +from sklearn.metrics import jaccard_score +from sklearn.linear_model import LogisticRegression # Load a multi-label dataset from https://www.openml.org/d/40597 X, Y = fetch_openml("yeast", version=4, return_X_y=True, parser="pandas") diff --git a/examples/neighbors/approximate_nearest_neighbors.py b/examples/neighbors/approximate_nearest_neighbors.py index e3968149d8473..479e324cd6aa4 100644 --- a/examples/neighbors/approximate_nearest_neighbors.py +++ b/examples/neighbors/approximate_nearest_neighbors.py @@ -43,12 +43,11 @@ """ -import sys - # Author: Tom Dupre la Tour # # License: BSD 3 clause import time +import sys try: import annoy @@ -62,18 +61,18 @@ print("The package 'nmslib' is required to run this example.") sys.exit() -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib.ticker import NullFormatter from scipy.sparse import csr_matrix from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.datasets import fetch_openml -from sklearn.manifold import TSNE from sklearn.neighbors import KNeighborsTransformer +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.datasets import fetch_openml from sklearn.pipeline import make_pipeline +from sklearn.manifold import TSNE from sklearn.utils import shuffle -from sklearn.utils._testing import assert_array_almost_equal class NMSlibTransformer(TransformerMixin, BaseEstimator): diff --git a/examples/neighbors/plot_caching_nearest_neighbors.py b/examples/neighbors/plot_caching_nearest_neighbors.py index 10c0d315da7af..00be6470c1591 100644 --- a/examples/neighbors/plot_caching_nearest_neighbors.py +++ b/examples/neighbors/plot_caching_nearest_neighbors.py @@ -22,12 +22,11 @@ # # License: BSD 3 clause from tempfile import TemporaryDirectory - import matplotlib.pyplot as plt -from sklearn.datasets import load_digits +from sklearn.neighbors import KNeighborsTransformer, KNeighborsClassifier from sklearn.model_selection import GridSearchCV -from sklearn.neighbors import KNeighborsClassifier, KNeighborsTransformer +from sklearn.datasets import load_digits from sklearn.pipeline import Pipeline X, y = load_digits(return_X_y=True) diff --git a/examples/neighbors/plot_classification.py b/examples/neighbors/plot_classification.py index 4ed23862ae455..cc4f0864ba926 100644 --- a/examples/neighbors/plot_classification.py +++ b/examples/neighbors/plot_classification.py @@ -11,8 +11,7 @@ import matplotlib.pyplot as plt import seaborn as sns from matplotlib.colors import ListedColormap - -from sklearn import datasets, neighbors +from sklearn import neighbors, datasets from sklearn.inspection import DecisionBoundaryDisplay n_neighbors = 15 diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py index 045058eab09cc..e580f9fa178bc 100644 --- a/examples/neighbors/plot_digits_kde_sampling.py +++ b/examples/neighbors/plot_digits_kde_sampling.py @@ -11,13 +11,13 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import load_digits +from sklearn.neighbors import KernelDensity from sklearn.decomposition import PCA from sklearn.model_selection import GridSearchCV -from sklearn.neighbors import KernelDensity # load the data digits = load_digits() diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py index 7c9b6d618b57a..8b139d4cc2335 100644 --- a/examples/neighbors/plot_kde_1d.py +++ b/examples/neighbors/plot_kde_1d.py @@ -28,13 +28,11 @@ """ -import matplotlib.pyplot as plt - # Author: Jake Vanderplas # import numpy as np +import matplotlib.pyplot as plt from scipy.stats import norm - from sklearn.neighbors import KernelDensity # ---------------------------------------------------------------------- diff --git a/examples/neighbors/plot_lof_novelty_detection.py b/examples/neighbors/plot_lof_novelty_detection.py index 91e40661c6dfe..277134cc77673 100644 --- a/examples/neighbors/plot_lof_novelty_detection.py +++ b/examples/neighbors/plot_lof_novelty_detection.py @@ -25,10 +25,9 @@ """ +import numpy as np import matplotlib import matplotlib.pyplot as plt -import numpy as np - from sklearn.neighbors import LocalOutlierFactor np.random.seed(42) diff --git a/examples/neighbors/plot_lof_outlier_detection.py b/examples/neighbors/plot_lof_outlier_detection.py index a01a06eededb7..1512173965889 100644 --- a/examples/neighbors/plot_lof_outlier_detection.py +++ b/examples/neighbors/plot_lof_outlier_detection.py @@ -24,9 +24,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.neighbors import LocalOutlierFactor np.random.seed(42) diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py index 5dcca91fd94ed..17e6a667fcb3b 100644 --- a/examples/neighbors/plot_nca_classification.py +++ b/examples/neighbors/plot_nca_classification.py @@ -19,13 +19,13 @@ import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap - from sklearn import datasets -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler +from sklearn.inspection import DecisionBoundaryDisplay + n_neighbors = 1 diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py index 82fd35616929e..d245e0223ccfa 100644 --- a/examples/neighbors/plot_nca_dim_reduction.py +++ b/examples/neighbors/plot_nca_dim_reduction.py @@ -30,13 +30,12 @@ # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import datasets +from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py index e5fd2f9cb67bd..d722ffa5be033 100644 --- a/examples/neighbors/plot_nca_illustration.py +++ b/examples/neighbors/plot_nca_illustration.py @@ -12,13 +12,12 @@ # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np -from matplotlib import cm -from scipy.special import logsumexp - +import matplotlib.pyplot as plt from sklearn.datasets import make_classification from sklearn.neighbors import NeighborhoodComponentsAnalysis +from matplotlib import cm +from scipy.special import logsumexp # %% # Original points diff --git a/examples/neighbors/plot_nearest_centroid.py b/examples/neighbors/plot_nearest_centroid.py index 5d299b5353e9d..0ea3c0c6b1209 100644 --- a/examples/neighbors/plot_nearest_centroid.py +++ b/examples/neighbors/plot_nearest_centroid.py @@ -8,13 +8,12 @@ """ -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap - from sklearn import datasets -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.neighbors import NearestCentroid +from sklearn.inspection import DecisionBoundaryDisplay n_neighbors = 15 diff --git a/examples/neighbors/plot_regression.py b/examples/neighbors/plot_regression.py index 1e52af3a5c743..78b850d1a4e2c 100644 --- a/examples/neighbors/plot_regression.py +++ b/examples/neighbors/plot_regression.py @@ -15,13 +15,11 @@ # License: BSD 3 clause (C) INRIA -import matplotlib.pyplot as plt - # %% # Generate sample data # -------------------- import numpy as np - +import matplotlib.pyplot as plt from sklearn import neighbors np.random.seed(0) diff --git a/examples/neighbors/plot_species_kde.py b/examples/neighbors/plot_species_kde.py index 6007ebd58cb7b..c409d354ec986 100644 --- a/examples/neighbors/plot_species_kde.py +++ b/examples/neighbors/plot_species_kde.py @@ -40,9 +40,8 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.datasets import fetch_species_distributions from sklearn.neighbors import KernelDensity diff --git a/examples/neural_networks/plot_mlp_alpha.py b/examples/neural_networks/plot_mlp_alpha.py index b53beef54c115..443d41f4707bf 100644 --- a/examples/neural_networks/plot_mlp_alpha.py +++ b/examples/neural_networks/plot_mlp_alpha.py @@ -23,12 +23,11 @@ import numpy as np from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap - -from sklearn.datasets import make_circles, make_classification, make_moons from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler h = 0.02 # step size in the mesh diff --git a/examples/neural_networks/plot_mlp_training_curves.py b/examples/neural_networks/plot_mlp_training_curves.py index a9f03c2599a8e..3fbddda879162 100644 --- a/examples/neural_networks/plot_mlp_training_curves.py +++ b/examples/neural_networks/plot_mlp_training_curves.py @@ -18,10 +18,10 @@ import matplotlib.pyplot as plt -from sklearn import datasets -from sklearn.exceptions import ConvergenceWarning from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import MinMaxScaler +from sklearn import datasets +from sklearn.exceptions import ConvergenceWarning # different learning rate schedules and momentum parameters params = [ diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py index 43e6a171fb696..03f615786e830 100644 --- a/examples/neural_networks/plot_mnist_filters.py +++ b/examples/neural_networks/plot_mnist_filters.py @@ -25,13 +25,11 @@ """ import warnings - import matplotlib.pyplot as plt - from sklearn.datasets import fetch_openml from sklearn.exceptions import ConvergenceWarning -from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier +from sklearn.model_selection import train_test_split # Load data from https://www.openml.org/d/554 X, y = fetch_openml( diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py index 3ba878d4ad191..de939922d9514 100644 --- a/examples/neural_networks/plot_rbm_logistic_classification.py +++ b/examples/neural_networks/plot_rbm_logistic_classification.py @@ -23,12 +23,14 @@ # linear shifts of 1 pixel in each direction. import numpy as np + from scipy.ndimage import convolve from sklearn import datasets -from sklearn.model_selection import train_test_split from sklearn.preprocessing import minmax_scale +from sklearn.model_selection import train_test_split + def nudge_dataset(X, Y): """ diff --git a/examples/preprocessing/plot_all_scaling.py b/examples/preprocessing/plot_all_scaling.py index b02d98a0a4e77..49af744011d12 100644 --- a/examples/preprocessing/plot_all_scaling.py +++ b/examples/preprocessing/plot_all_scaling.py @@ -46,22 +46,22 @@ # Thomas Unterthiner # License: BSD 3 clause -import matplotlib as mpl import numpy as np -from matplotlib import cm + +import matplotlib as mpl from matplotlib import pyplot as plt +from matplotlib import cm + +from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import minmax_scale +from sklearn.preprocessing import MaxAbsScaler +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import RobustScaler +from sklearn.preprocessing import Normalizer +from sklearn.preprocessing import QuantileTransformer +from sklearn.preprocessing import PowerTransformer from sklearn.datasets import fetch_california_housing -from sklearn.preprocessing import ( - MaxAbsScaler, - MinMaxScaler, - Normalizer, - PowerTransformer, - QuantileTransformer, - RobustScaler, - StandardScaler, - minmax_scale, -) dataset = fetch_california_housing() X_full, y_full = dataset.data, dataset.target diff --git a/examples/preprocessing/plot_discretization.py b/examples/preprocessing/plot_discretization.py index 3806eb26fd68b..d064ea705903b 100644 --- a/examples/preprocessing/plot_discretization.py +++ b/examples/preprocessing/plot_discretization.py @@ -32,8 +32,8 @@ # Hanmin Qin # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.preprocessing import KBinsDiscretizer diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py index acff3f6b2089d..ff3d2973caff3 100644 --- a/examples/preprocessing/plot_discretization_classification.py +++ b/examples/preprocessing/plot_discretization_classification.py @@ -34,19 +34,20 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap - -from sklearn.datasets import make_circles, make_classification, make_moons -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.exceptions import ConvergenceWarning +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.model_selection import GridSearchCV from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import KBinsDiscretizer, StandardScaler +from sklearn.preprocessing import KBinsDiscretizer from sklearn.svm import SVC, LinearSVC +from sklearn.ensemble import GradientBoostingClassifier from sklearn.utils._testing import ignore_warnings +from sklearn.exceptions import ConvergenceWarning h = 0.02 # step size in the mesh diff --git a/examples/preprocessing/plot_discretization_strategies.py b/examples/preprocessing/plot_discretization_strategies.py index 47342df2f221e..d6b9cb16ee53c 100644 --- a/examples/preprocessing/plot_discretization_strategies.py +++ b/examples/preprocessing/plot_discretization_strategies.py @@ -20,11 +20,11 @@ # Author: Tom Dupré la Tour # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn.datasets import make_blobs from sklearn.preprocessing import KBinsDiscretizer +from sklearn.datasets import make_blobs strategies = ["uniform", "quantile", "kmeans"] diff --git a/examples/preprocessing/plot_map_data_to_normal.py b/examples/preprocessing/plot_map_data_to_normal.py index a521039098871..42a61d84fa384 100644 --- a/examples/preprocessing/plot_map_data_to_normal.py +++ b/examples/preprocessing/plot_map_data_to_normal.py @@ -38,11 +38,13 @@ # Nicolas Hug # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import PowerTransformer +from sklearn.preprocessing import QuantileTransformer from sklearn.model_selection import train_test_split -from sklearn.preprocessing import PowerTransformer, QuantileTransformer + N_SAMPLES = 1000 FONT_SIZE = 6 diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py index fba668b249084..8ba1263b07d10 100644 --- a/examples/preprocessing/plot_scaling_importance.py +++ b/examples/preprocessing/plot_scaling_importance.py @@ -43,13 +43,13 @@ """ import matplotlib.pyplot as plt -from sklearn.datasets import load_wine -from sklearn.decomposition import PCA -from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA from sklearn.naive_bayes import GaussianNB +from sklearn.metrics import accuracy_score +from sklearn.datasets import load_wine from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler # Code source: Tyler Lanigan # Sebastian Raschka diff --git a/examples/release_highlights/plot_release_highlights_0_22_0.py b/examples/release_highlights/plot_release_highlights_0_22_0.py index 3d6846a32f97f..57f1527b7fddb 100644 --- a/examples/release_highlights/plot_release_highlights_0_22_0.py +++ b/examples/release_highlights/plot_release_highlights_0_22_0.py @@ -34,13 +34,12 @@ # :class:`~metrics.plot_confusion_matrix`. Read more about this new API in the # :ref:`User Guide `. -import matplotlib.pyplot as plt - -from sklearn.datasets import make_classification -from sklearn.ensemble import RandomForestClassifier -from sklearn.metrics import plot_roc_curve from sklearn.model_selection import train_test_split from sklearn.svm import SVC +from sklearn.metrics import plot_roc_curve +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification +import matplotlib.pyplot as plt X, y = make_classification(random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) @@ -74,12 +73,12 @@ # Read more in the :ref:`User Guide `. from sklearn.datasets import load_iris -from sklearn.ensemble import StackingClassifier +from sklearn.svm import LinearSVC from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from sklearn.svm import LinearSVC +from sklearn.pipeline import make_pipeline +from sklearn.ensemble import StackingClassifier +from sklearn.model_selection import train_test_split X, y = load_iris(return_X_y=True) estimators = [ @@ -97,9 +96,8 @@ # The :func:`inspection.permutation_importance` can be used to get an # estimate of the importance of each feature, for any fitted estimator: -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.inspection import permutation_importance @@ -151,9 +149,8 @@ # See more details in the :ref:`User Guide `. from tempfile import TemporaryDirectory - -from sklearn.manifold import Isomap from sklearn.neighbors import KNeighborsTransformer +from sklearn.manifold import Isomap from sklearn.pipeline import make_pipeline X, y = make_classification(random_state=0) @@ -269,8 +266,8 @@ def test_sklearn_compatible_estimator(estimator, check): from sklearn.datasets import make_classification -from sklearn.metrics import roc_auc_score from sklearn.svm import SVC +from sklearn.metrics import roc_auc_score X, y = make_classification(n_classes=4, n_informative=16) clf = SVC(decision_function_shape="ovo", probability=True).fit(X, y) diff --git a/examples/release_highlights/plot_release_highlights_0_23_0.py b/examples/release_highlights/plot_release_highlights_0_23_0.py index 8b298b0605272..4d6a914e01fb3 100644 --- a/examples/release_highlights/plot_release_highlights_0_23_0.py +++ b/examples/release_highlights/plot_release_highlights_0_23_0.py @@ -35,10 +35,9 @@ # 'poisson' loss as well. import numpy as np - -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.linear_model import PoissonRegressor from sklearn.model_selection import train_test_split +from sklearn.linear_model import PoissonRegressor +from sklearn.ensemble import HistGradientBoostingRegressor n_samples, n_features = 1000, 20 rng = np.random.RandomState(0) @@ -64,11 +63,11 @@ # this feature. from sklearn import set_config -from sklearn.compose import make_column_transformer -from sklearn.impute import SimpleImputer -from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.impute import SimpleImputer +from sklearn.compose import make_column_transformer +from sklearn.linear_model import LogisticRegression set_config(display="diagram") @@ -86,8 +85,6 @@ clf = make_pipeline(preprocessor, LogisticRegression()) clf -import numpy as np - ############################################################################## # Scalability and stability improvements to KMeans # ------------------------------------------------ @@ -98,11 +95,11 @@ # effect anymore. For more details on how to control the number of threads, # please refer to our :ref:`parallelism` notes. import scipy - +import numpy as np +from sklearn.model_selection import train_test_split from sklearn.cluster import KMeans from sklearn.datasets import make_blobs from sklearn.metrics import completeness_score -from sklearn.model_selection import train_test_split rng = np.random.RandomState(0) X, y = make_blobs(random_state=rng) @@ -128,10 +125,9 @@ # effect of the first feature, instead of fitting the noise. import numpy as np from matplotlib import pyplot as plt - -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.inspection import plot_partial_dependence from sklearn.model_selection import train_test_split +from sklearn.inspection import plot_partial_dependence +from sklearn.ensemble import HistGradientBoostingRegressor n_samples = 500 rng = np.random.RandomState(0) @@ -170,11 +166,10 @@ # The two linear regressors :class:`~sklearn.linear_model.Lasso` and # :class:`~sklearn.linear_model.ElasticNet` now support sample weights. -import numpy as np - +from sklearn.model_selection import train_test_split from sklearn.datasets import make_regression from sklearn.linear_model import Lasso -from sklearn.model_selection import train_test_split +import numpy as np n_samples, n_features = 1000, 20 rng = np.random.RandomState(0) diff --git a/examples/release_highlights/plot_release_highlights_0_24_0.py b/examples/release_highlights/plot_release_highlights_0_24_0.py index 888e6e5af9463..7e044db11ccd3 100644 --- a/examples/release_highlights/plot_release_highlights_0_24_0.py +++ b/examples/release_highlights/plot_release_highlights_0_24_0.py @@ -51,11 +51,10 @@ import numpy as np from scipy.stats import randint - -from sklearn.datasets import make_classification -from sklearn.ensemble import RandomForestClassifier from sklearn.experimental import enable_halving_search_cv # noqa from sklearn.model_selection import HalvingRandomSearchCV +from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import make_classification rng = np.random.RandomState(0) @@ -119,7 +118,6 @@ # Read more in the :ref:`User guide `. import numpy as np - from sklearn import datasets from sklearn.semi_supervised import SelfTrainingClassifier from sklearn.svm import SVC @@ -142,9 +140,9 @@ # (backward selection), based on a cross-validated score maximization. # See the :ref:`User Guide `. -from sklearn.datasets import load_iris from sklearn.feature_selection import SequentialFeatureSelector from sklearn.neighbors import KNeighborsClassifier +from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True, as_frame=True) feature_names = X.columns @@ -165,11 +163,11 @@ # :class:`~sklearn.preprocessing.PolynomialFeatures`. from sklearn.datasets import fetch_covtype -from sklearn.kernel_approximation import PolynomialCountSketch -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline +from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler +from sklearn.kernel_approximation import PolynomialCountSketch +from sklearn.linear_model import LogisticRegression X, y = fetch_covtype(return_X_y=True) pipe = make_pipeline( @@ -196,8 +194,8 @@ # prediction on a feature for each sample separately, with one line per sample. # See the :ref:`User Guide ` -from sklearn.datasets import fetch_california_housing from sklearn.ensemble import RandomForestRegressor +from sklearn.datasets import fetch_california_housing from sklearn.inspection import plot_partial_dependence X, y = fetch_california_housing(return_X_y=True, as_frame=True) @@ -228,10 +226,9 @@ # splitting criterion. Setting `criterion="poisson"` might be a good choice # if your target is a count or a frequency. -import numpy as np - -from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeRegressor +from sklearn.model_selection import train_test_split +import numpy as np n_samples, n_features = 1000, 20 rng = np.random.RandomState(0) diff --git a/examples/release_highlights/plot_release_highlights_1_0_0.py b/examples/release_highlights/plot_release_highlights_1_0_0.py index 987d02c840e48..079d87a5d4f51 100644 --- a/examples/release_highlights/plot_release_highlights_1_0_0.py +++ b/examples/release_highlights/plot_release_highlights_1_0_0.py @@ -89,7 +89,6 @@ # refer to the :ref:`User Guide `. import numpy as np - from sklearn.preprocessing import SplineTransformer X = np.arange(5).reshape(5, 1) @@ -135,8 +134,6 @@ # :align: center # :scale: 50% -import pandas as pd - ############################################################################## # Feature Names Support # -------------------------------------------------------------------------- @@ -149,13 +146,12 @@ # non-:term:`fit`, such as :term:`predict`, are consistent with features in # :term:`fit`: from sklearn.preprocessing import StandardScaler +import pandas as pd X = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) scalar = StandardScaler().fit(X) scalar.feature_names_in_ -import pandas as pd - # %% # The support of :term:`get_feature_names_out` is available for transformers # that already had :term:`get_feature_names` and transformers with a one-to-one @@ -166,6 +162,7 @@ # combine feature names of its transformers: from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder +import pandas as pd X = pd.DataFrame({"pet": ["dog", "cat", "fish"], "age": [3, 7, 1]}) preprocessor = ColumnTransformer( diff --git a/examples/release_highlights/plot_release_highlights_1_1_0.py b/examples/release_highlights/plot_release_highlights_1_1_0.py index 99b669930858e..7021cd2bbd821 100644 --- a/examples/release_highlights/plot_release_highlights_1_1_0.py +++ b/examples/release_highlights/plot_release_highlights_1_1_0.py @@ -21,15 +21,14 @@ """ -import matplotlib.pyplot as plt -import numpy as np - # %% # Quantile loss in :class:`ensemble.HistGradientBoostingRegressor` # ---------------------------------------------------------------- # :class:`ensemble.HistGradientBoostingRegressor` can model quantiles with # `loss="quantile"` and the new parameter `quantile`. from sklearn.ensemble import HistGradientBoostingRegressor +import numpy as np +import matplotlib.pyplot as plt # Simple regression function for X * cos(X) rng = np.random.RandomState(42) @@ -60,12 +59,12 @@ # :class:`pipeline.Pipeline` to construct the output feature names for more complex # pipelines: from sklearn.compose import ColumnTransformer -from sklearn.datasets import fetch_openml -from sklearn.feature_selection import SelectKBest +from sklearn.preprocessing import OneHotEncoder, StandardScaler +from sklearn.pipeline import make_pipeline from sklearn.impute import SimpleImputer +from sklearn.feature_selection import SelectKBest +from sklearn.datasets import fetch_openml from sklearn.linear_model import LogisticRegression -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import OneHotEncoder, StandardScaler X, y = fetch_openml( "titanic", version=1, as_frame=True, return_X_y=True, parser="pandas" @@ -101,8 +100,6 @@ plt.tight_layout() -import numpy as np - # %% # Grouping infrequent categories in :class:`OneHotEncoder` # -------------------------------------------------------- @@ -111,6 +108,7 @@ # categories are `min_frequency` and `max_categories`. See the # :ref:`User Guide ` for more details. from sklearn.preprocessing import OneHotEncoder +import numpy as np X = np.array( [["dog"] * 5 + ["cat"] * 20 + ["rabbit"] * 10 + ["snake"] * 3], dtype=object @@ -177,7 +175,6 @@ # online learning when the data is not readily available from the start, or when the # data does not fit into memory. import numpy as np - from sklearn.decomposition import MiniBatchNMF rng = np.random.RandomState(0) @@ -200,10 +197,6 @@ f"{np.sum((X - X_reconstructed) ** 2) / np.sum(X**2):.5f}", ) -import matplotlib.pyplot as plt - -from sklearn.cluster import BisectingKMeans, KMeans - # %% # BisectingKMeans: divide and cluster # ----------------------------------- @@ -213,6 +206,8 @@ # new clusters repeatedly until the target number of clusters is reached, giving a # hierarchical structure to the clustering. from sklearn.datasets import make_blobs +from sklearn.cluster import KMeans, BisectingKMeans +import matplotlib.pyplot as plt X, _ = make_blobs(n_samples=1000, centers=2, random_state=0) diff --git a/examples/semi_supervised/plot_label_propagation_digits.py b/examples/semi_supervised/plot_label_propagation_digits.py index dc4ed674a21bc..f848e3b76e084 100644 --- a/examples/semi_supervised/plot_label_propagation_digits.py +++ b/examples/semi_supervised/plot_label_propagation_digits.py @@ -19,14 +19,13 @@ class will be very good. # Authors: Clay Woolam # License: BSD -import numpy as np - # %% # Data generation # --------------- # # We use the digits dataset. We only use a subset of randomly selected samples. from sklearn import datasets +import numpy as np digits = datasets.load_digits() rng = np.random.RandomState(2) @@ -54,8 +53,6 @@ class will be very good. y_train = np.copy(y) y_train[unlabeled_set] = -1 -from sklearn.metrics import classification_report - # %% # Semi-supervised learning # ------------------------ @@ -63,6 +60,7 @@ class will be very good. # We fit a :class:`~sklearn.semi_supervised.LabelSpreading` and use it to predict # the unknown labels. from sklearn.semi_supervised import LabelSpreading +from sklearn.metrics import classification_report lp_model = LabelSpreading(gamma=0.25, max_iter=20) lp_model.fit(X, y_train) diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py index 9a5facc15bd71..7d4a348cad9b6 100644 --- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py +++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py @@ -23,13 +23,13 @@ # Authors: Clay Woolam # License: BSD -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from scipy import stats from sklearn import datasets -from sklearn.metrics import classification_report, confusion_matrix from sklearn.semi_supervised import LabelSpreading +from sklearn.metrics import classification_report, confusion_matrix digits = datasets.load_digits() rng = np.random.RandomState(0) diff --git a/examples/semi_supervised/plot_label_propagation_structure.py b/examples/semi_supervised/plot_label_propagation_structure.py index de5a72ddf33fa..97e8e19c2b3b3 100644 --- a/examples/semi_supervised/plot_label_propagation_structure.py +++ b/examples/semi_supervised/plot_label_propagation_structure.py @@ -22,7 +22,6 @@ # Here, all labels but two are tagged as unknown. import numpy as np - from sklearn.datasets import make_circles n_samples = 200 diff --git a/examples/semi_supervised/plot_self_training_varying_threshold.py b/examples/semi_supervised/plot_self_training_varying_threshold.py index 2c7a485d06eb0..801e48b8411f5 100644 --- a/examples/semi_supervised/plot_self_training_varying_threshold.py +++ b/examples/semi_supervised/plot_self_training_varying_threshold.py @@ -32,14 +32,13 @@ # Authors: Oliver Rausch # License: BSD -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.metrics import accuracy_score +from sklearn.svm import SVC from sklearn.model_selection import StratifiedKFold from sklearn.semi_supervised import SelfTrainingClassifier -from sklearn.svm import SVC +from sklearn.metrics import accuracy_score from sklearn.utils import shuffle n_splits = 3 diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py index 08f8e477d1971..a2ffe53ca2182 100644 --- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py +++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py @@ -15,13 +15,15 @@ import numpy as np from sklearn.datasets import fetch_20newsgroups -from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.preprocessing import FunctionTransformer from sklearn.linear_model import SGDClassifier -from sklearn.metrics import f1_score from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline -from sklearn.preprocessing import FunctionTransformer -from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier +from sklearn.semi_supervised import SelfTrainingClassifier +from sklearn.semi_supervised import LabelSpreading +from sklearn.metrics import f1_score # Loading dataset containing first five categories data = fetch_20newsgroups( diff --git a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py index 766f7ea0a79c6..402cd41d6a0f2 100644 --- a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py +++ b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py @@ -18,12 +18,13 @@ # Oliver Rausch # License: BSD -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import datasets -from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier from sklearn.svm import SVC +from sklearn.semi_supervised import LabelSpreading +from sklearn.semi_supervised import SelfTrainingClassifier + iris = datasets.load_iris() diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py index cacd67ed056ac..c2c3bc6e6ba28 100644 --- a/examples/svm/plot_custom_kernel.py +++ b/examples/svm/plot_custom_kernel.py @@ -8,10 +8,9 @@ """ -import matplotlib.pyplot as plt import numpy as np - -from sklearn import datasets, svm +import matplotlib.pyplot as plt +from sklearn import svm, datasets from sklearn.inspection import DecisionBoundaryDisplay # import some data to play with diff --git a/examples/svm/plot_iris_svc.py b/examples/svm/plot_iris_svc.py index d13a9fe49c803..5931ad57c263f 100644 --- a/examples/svm/plot_iris_svc.py +++ b/examples/svm/plot_iris_svc.py @@ -35,10 +35,10 @@ """ import matplotlib.pyplot as plt - -from sklearn import datasets, svm +from sklearn import svm, datasets from sklearn.inspection import DecisionBoundaryDisplay + # import some data to play with iris = datasets.load_iris() # Take the first two features. We could avoid this by using a two-dim dataset diff --git a/examples/svm/plot_linearsvc_support_vectors.py b/examples/svm/plot_linearsvc_support_vectors.py index 7f82b6c8bb0fe..7fdfea416013f 100644 --- a/examples/svm/plot_linearsvc_support_vectors.py +++ b/examples/svm/plot_linearsvc_support_vectors.py @@ -9,12 +9,11 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.datasets import make_blobs -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.svm import LinearSVC +from sklearn.inspection import DecisionBoundaryDisplay X, y = make_blobs(n_samples=40, centers=2, random_state=0) diff --git a/examples/svm/plot_oneclass.py b/examples/svm/plot_oneclass.py index d4348fa0ec435..082cbcd6de2be 100644 --- a/examples/svm/plot_oneclass.py +++ b/examples/svm/plot_oneclass.py @@ -11,10 +11,9 @@ """ -import matplotlib.font_manager -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt +import matplotlib.font_manager from sklearn import svm xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500)) diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py index ba0154b477b46..fa4310134487a 100644 --- a/examples/svm/plot_rbf_parameters.py +++ b/examples/svm/plot_rbf_parameters.py @@ -135,8 +135,9 @@ def __call__(self, value, clip=None): # 10 is often helpful. Using a basis of 2, a finer # tuning can be achieved but at a much higher cost. -from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit from sklearn.svm import SVC +from sklearn.model_selection import StratifiedShuffleSplit +from sklearn.model_selection import GridSearchCV C_range = np.logspace(-2, 10, 13) gamma_range = np.logspace(-9, 3, 13) diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index 23f464169f516..45bacff6a2b97 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -10,11 +10,11 @@ """ import matplotlib.pyplot as plt - from sklearn import svm from sklearn.datasets import make_blobs from sklearn.inspection import DecisionBoundaryDisplay + # we create 40 separable points X, y = make_blobs(n_samples=40, centers=2, random_state=6) diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index 6fd7de98f3fb6..fe71420ffd0b3 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -26,7 +26,6 @@ """ import matplotlib.pyplot as plt - from sklearn import svm from sklearn.datasets import make_blobs from sklearn.inspection import DecisionBoundaryDisplay diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py index d26f0eed18fa0..730d6a35f35a8 100644 --- a/examples/svm/plot_svm_anova.py +++ b/examples/svm/plot_svm_anova.py @@ -14,7 +14,6 @@ # Load some data to play with # --------------------------- import numpy as np - from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) @@ -23,12 +22,11 @@ rng = np.random.RandomState(0) X = np.hstack((X, 2 * rng.random((X.shape[0], 36)))) -from sklearn.feature_selection import SelectPercentile, chi2 - # %% # Create the pipeline # ------------------- from sklearn.pipeline import Pipeline +from sklearn.feature_selection import SelectPercentile, chi2 from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC @@ -47,7 +45,6 @@ # Plot the cross-validation score as a function of percentile of features # ----------------------------------------------------------------------- import matplotlib.pyplot as plt - from sklearn.model_selection import cross_val_score score_means = list() diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py index 70c1f13daad3e..dd7eb43e15231 100644 --- a/examples/svm/plot_svm_kernels.py +++ b/examples/svm/plot_svm_kernels.py @@ -14,11 +14,11 @@ # Code source: Gaël Varoquaux # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import svm + # Our dataset and targets X = np.c_[ (0.4, -0.7), diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index c36fcfdd05fce..9f52881f1faf2 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -18,10 +18,9 @@ # Modified for documentation by Jaques Grobler # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt from matplotlib import cm - from sklearn import svm # we create 40 separable points diff --git a/examples/svm/plot_svm_nonlinear.py b/examples/svm/plot_svm_nonlinear.py index 4990e509661a1..f88231b4b6af4 100644 --- a/examples/svm/plot_svm_nonlinear.py +++ b/examples/svm/plot_svm_nonlinear.py @@ -11,9 +11,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import svm xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py index ab34528a37af6..75a16b571c3ea 100644 --- a/examples/svm/plot_svm_regression.py +++ b/examples/svm/plot_svm_regression.py @@ -7,10 +7,9 @@ """ -import matplotlib.pyplot as plt import numpy as np - from sklearn.svm import SVR +import matplotlib.pyplot as plt # %% # Generate sample data diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py index 1bd15bb64d683..b7e367e45d531 100644 --- a/examples/svm/plot_svm_scale_c.py +++ b/examples/svm/plot_svm_scale_c.py @@ -82,13 +82,14 @@ # Jaques Grobler # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np +import matplotlib.pyplot as plt -from sklearn import datasets -from sklearn.model_selection import GridSearchCV, ShuffleSplit from sklearn.svm import LinearSVC +from sklearn.model_selection import ShuffleSplit +from sklearn.model_selection import GridSearchCV from sklearn.utils import check_random_state +from sklearn import datasets rnd = check_random_state(1) diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py index be1749c0a4535..e12460b494c02 100644 --- a/examples/svm/plot_svm_tie_breaking.py +++ b/examples/svm/plot_svm_tie_breaking.py @@ -17,11 +17,10 @@ # Code source: Andreas Mueller, Adrin Jalali # License: BSD 3 clause -import matplotlib.pyplot as plt import numpy as np - -from sklearn.datasets import make_blobs +import matplotlib.pyplot as plt from sklearn.svm import SVC +from sklearn.datasets import make_blobs X, y = make_blobs(random_state=27) diff --git a/examples/svm/plot_weighted_samples.py b/examples/svm/plot_weighted_samples.py index c17742e091390..f346599300aba 100644 --- a/examples/svm/plot_weighted_samples.py +++ b/examples/svm/plot_weighted_samples.py @@ -14,9 +14,8 @@ """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn import svm diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py index a13fe259622cb..7f24861a0e9ce 100644 --- a/examples/text/plot_document_classification_20newsgroups.py +++ b/examples/text/plot_document_classification_20newsgroups.py @@ -87,7 +87,8 @@ def size_mb(docs): # Extracting features from the training data using a sparse vectorizer from time import time -from sklearn.feature_extraction.text import HashingVectorizer, TfidfVectorizer +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.feature_extraction.text import HashingVectorizer t0 = time() @@ -141,7 +142,6 @@ def size_mb(docs): # # First we define small benchmarking utilities import numpy as np - from sklearn import metrics from sklearn.utils.extmath import density @@ -190,22 +190,21 @@ def benchmark(clf): return clf_descr, score, train_time, test_time -from sklearn.ensemble import RandomForestClassifier - # %% # We now train and test the datasets with 15 different classification # models and get performance results for each model. from sklearn.feature_selection import SelectFromModel -from sklearn.linear_model import ( - PassiveAggressiveClassifier, - Perceptron, - RidgeClassifier, - SGDClassifier, -) -from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB -from sklearn.neighbors import KNeighborsClassifier, NearestCentroid +from sklearn.linear_model import RidgeClassifier from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC +from sklearn.linear_model import SGDClassifier +from sklearn.linear_model import Perceptron +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB +from sklearn.neighbors import KNeighborsClassifier +from sklearn.neighbors import NearestCentroid +from sklearn.ensemble import RandomForestClassifier + results = [] for clf, name in ( diff --git a/examples/text/plot_document_clustering.py b/examples/text/plot_document_clustering.py index c207a4b530cd6..24af666330e5c 100644 --- a/examples/text/plot_document_clustering.py +++ b/examples/text/plot_document_clustering.py @@ -53,24 +53,24 @@ # Lars Buitinck # License: BSD 3 clause +from sklearn.datasets import fetch_20newsgroups +from sklearn.decomposition import TruncatedSVD +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.feature_extraction.text import HashingVectorizer +from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import Normalizer +from sklearn import metrics + +from sklearn.cluster import KMeans, MiniBatchKMeans + import logging -import sys from optparse import OptionParser +import sys from time import time import numpy as np -from sklearn import metrics -from sklearn.cluster import KMeans, MiniBatchKMeans -from sklearn.datasets import fetch_20newsgroups -from sklearn.decomposition import TruncatedSVD -from sklearn.feature_extraction.text import ( - HashingVectorizer, - TfidfTransformer, - TfidfVectorizer, -) -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import Normalizer # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") diff --git a/examples/text/plot_hashing_vs_dict_vectorizer.py b/examples/text/plot_hashing_vs_dict_vectorizer.py index 92092b2c078b6..ce359cd137487 100644 --- a/examples/text/plot_hashing_vs_dict_vectorizer.py +++ b/examples/text/plot_hashing_vs_dict_vectorizer.py @@ -19,9 +19,9 @@ # Author: Lars Buitinck # License: BSD 3 clause +from collections import defaultdict import re import sys -from collections import defaultdict from time import time import numpy as np diff --git a/examples/tree/plot_cost_complexity_pruning.py b/examples/tree/plot_cost_complexity_pruning.py index b232389ea9ded..d21d163c9a1e3 100644 --- a/examples/tree/plot_cost_complexity_pruning.py +++ b/examples/tree/plot_cost_complexity_pruning.py @@ -18,9 +18,8 @@ """ import matplotlib.pyplot as plt - -from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split +from sklearn.datasets import load_breast_cancer from sklearn.tree import DecisionTreeClassifier # %% diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py index 15cdd36129a73..14f6506b5810f 100644 --- a/examples/tree/plot_iris_dtc.py +++ b/examples/tree/plot_iris_dtc.py @@ -21,15 +21,15 @@ iris = load_iris() -import matplotlib.pyplot as plt - # %% # Display the decision functions of trees trained on all pairs of features. import numpy as np +import matplotlib.pyplot as plt from sklearn.datasets import load_iris -from sklearn.inspection import DecisionBoundaryDisplay from sklearn.tree import DecisionTreeClassifier +from sklearn.inspection import DecisionBoundaryDisplay + # Parameters n_classes = 3 diff --git a/examples/tree/plot_tree_regression.py b/examples/tree/plot_tree_regression.py index a819c092dbae1..6ed28a5cbfa99 100644 --- a/examples/tree/plot_tree_regression.py +++ b/examples/tree/plot_tree_regression.py @@ -14,12 +14,10 @@ details of the training data and learn from the noise, i.e. they overfit. """ -import matplotlib.pyplot as plt - # Import the necessary modules and libraries import numpy as np - from sklearn.tree import DecisionTreeRegressor +import matplotlib.pyplot as plt # Create a random dataset rng = np.random.RandomState(1) diff --git a/examples/tree/plot_tree_regression_multioutput.py b/examples/tree/plot_tree_regression_multioutput.py index b6d2800d2732d..a75652a6ddd56 100644 --- a/examples/tree/plot_tree_regression_multioutput.py +++ b/examples/tree/plot_tree_regression_multioutput.py @@ -15,9 +15,8 @@ details of the training data and learn from the noise, i.e. they overfit. """ -import matplotlib.pyplot as plt import numpy as np - +import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeRegressor # Create a random dataset diff --git a/examples/tree/plot_unveil_tree_structure.py b/examples/tree/plot_unveil_tree_structure.py index d4009e3111f7f..6313d0ccbb74f 100644 --- a/examples/tree/plot_unveil_tree_structure.py +++ b/examples/tree/plot_unveil_tree_structure.py @@ -19,10 +19,10 @@ import numpy as np from matplotlib import pyplot as plt -from sklearn import tree -from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split +from sklearn.datasets import load_iris from sklearn.tree import DecisionTreeClassifier +from sklearn import tree ############################################################################## # Train tree classifier diff --git a/maint_tools/check_pxd_in_installation.py b/maint_tools/check_pxd_in_installation.py index 0b1e47ca95aa7..b792912048350 100644 --- a/maint_tools/check_pxd_in_installation.py +++ b/maint_tools/check_pxd_in_installation.py @@ -6,11 +6,12 @@ """ import os -import pathlib -import subprocess import sys +import pathlib import tempfile import textwrap +import subprocess + sklearn_dir = pathlib.Path(sys.argv[1]) pxd_files = list(sklearn_dir.glob("**/*.pxd")) diff --git a/maint_tools/sort_whats_new.py b/maint_tools/sort_whats_new.py index c9820cc984ba5..9a45e31322c05 100755 --- a/maint_tools/sort_whats_new.py +++ b/maint_tools/sort_whats_new.py @@ -2,8 +2,8 @@ # Sorts what's new entries with per-module headings. # Pass what's new entries on stdin. -import re import sys +import re from collections import defaultdict LABEL_ORDER = ["MajorFeature", "Feature", "Enhancement", "Efficiency", "Fix", "API"] diff --git a/maint_tools/update_tracking_issue.py b/maint_tools/update_tracking_issue.py index 7463e2388cdaf..855c733cffb31 100644 --- a/maint_tools/update_tracking_issue.py +++ b/maint_tools/update_tracking_issue.py @@ -11,9 +11,9 @@ github account that does **not** have commit access to the public repo. """ -import argparse -import sys from pathlib import Path +import sys +import argparse import defusedxml.ElementTree as ET from github import Github diff --git a/setup.py b/setup.py index fe3a00cda3959..7ad32e95e53a5 100755 --- a/setup.py +++ b/setup.py @@ -4,17 +4,18 @@ # 2010 Fabian Pedregosa # License: 3-clause BSD -import importlib +import sys import os import platform import shutil -import sys -import traceback -from distutils.command.clean import clean as Clean -from distutils.command.sdist import sdist # We need to import setuptools before because it monkey-patches distutils import setuptools # noqa +from distutils.command.clean import clean as Clean +from distutils.command.sdist import sdist + +import traceback +import importlib try: import builtins @@ -52,6 +53,7 @@ import sklearn._min_dependencies as min_deps # noqa from sklearn.externals._packaging.version import parse as parse_version # noqa + VERSION = sklearn.__version__ @@ -163,7 +165,6 @@ def configuration(parent_package="", top_path=None): os.remove("MANIFEST") from numpy.distutils.misc_util import Configuration - from sklearn._build_utils import _check_cython_version config = Configuration(None, parent_package, top_path) @@ -304,7 +305,7 @@ def setup_package(): # These commands require the setup from numpy.distutils because they # may use numpy.distutils compiler classes. - from distutils.ccompiler import CCompiler + from numpy.distutils.core import setup # Monkeypatches CCompiler.spawn to prevent random wheel build errors on Windows # The build errors on Windows was because msvccompiler spawn was not threadsafe @@ -313,8 +314,7 @@ def setup_package(): # https://github.com/scikit-learn/scikit-learn/issues/22310 # https://github.com/numpy/numpy/pull/20640 from numpy.distutils.ccompiler import replace_method - from numpy.distutils.core import setup - + from distutils.ccompiler import CCompiler from sklearn.externals._numpy_compiler_patch import CCompiler_spawn replace_method(CCompiler, "spawn", CCompiler_spawn) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 71e70d08b7924..097501b0c5c6a 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -12,12 +12,13 @@ See http://scikit-learn.org for complete documentation. """ +import sys import logging import os import random -import sys -from ._config import config_context, get_config, set_config + +from ._config import get_config, set_config, config_context logger = logging.getLogger(__name__) @@ -76,8 +77,8 @@ # It is necessary to do this prior to importing show_versions as the # later is linked to the OpenMP runtime to make it possible to introspect # it and importing it first would fail if the OpenMP dll cannot be found. - from . import __check_build # noqa: F401 from . import _distributor_init # noqa: F401 + from . import __check_build # noqa: F401 from .base import clone from .utils._show_versions import show_versions diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py index 8a9d62050088d..d8206a3a715f8 100644 --- a/sklearn/_build_utils/__init__.py +++ b/sklearn/_build_utils/__init__.py @@ -5,15 +5,16 @@ # license: BSD -import contextlib import os -from distutils.version import LooseVersion - import sklearn +import contextlib + +from distutils.version import LooseVersion -from .._min_dependencies import CYTHON_MIN_VERSION -from .openmp_helpers import check_openmp_support from .pre_build_helpers import basic_check_build +from .openmp_helpers import check_openmp_support +from .._min_dependencies import CYTHON_MIN_VERSION + DEFAULT_ROOT = "sklearn" diff --git a/sklearn/_build_utils/openmp_helpers.py b/sklearn/_build_utils/openmp_helpers.py index cd16db9684bbb..192e96cd30765 100644 --- a/sklearn/_build_utils/openmp_helpers.py +++ b/sklearn/_build_utils/openmp_helpers.py @@ -5,10 +5,11 @@ import os -import subprocess import sys import textwrap import warnings +import subprocess + from distutils.errors import CompileError, LinkError from .pre_build_helpers import compile_test_program diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py index 01ab527d4e67a..0a2a942f7991e 100644 --- a/sklearn/_build_utils/pre_build_helpers.py +++ b/sklearn/_build_utils/pre_build_helpers.py @@ -1,17 +1,17 @@ """Helpers to check build environment before actual build of scikit-learn""" -import glob import os -import subprocess import sys +import glob import tempfile import textwrap +import setuptools # noqa +import subprocess import warnings + from distutils.dist import Distribution from distutils.sysconfig import customize_compiler -import setuptools # noqa - # NumPy 1.23 deprecates numpy.distutils with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) diff --git a/sklearn/_config.py b/sklearn/_config.py index 832ead13df655..c865b879dbea3 100644 --- a/sklearn/_config.py +++ b/sklearn/_config.py @@ -1,8 +1,8 @@ """Global configuration state and functions for management """ import os -import threading from contextlib import contextmanager as contextmanager +import threading _global_config = { "assume_finite": bool(os.environ.get("SKLEARN_ASSUME_FINITE", False)), diff --git a/sklearn/_isotonic.pyx b/sklearn/_isotonic.pyx index 21dd4909d79e4..34b6871f133e8 100644 --- a/sklearn/_isotonic.pyx +++ b/sklearn/_isotonic.pyx @@ -5,9 +5,8 @@ # pool at each step. import numpy as np - -cimport cython cimport numpy as cnp +cimport cython from cython cimport floating cnp.import_array() diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py index 7090d1a6a5c83..63ae3038df8ae 100644 --- a/sklearn/_loss/__init__.py +++ b/sklearn/_loss/__init__.py @@ -4,17 +4,18 @@ """ from .loss import ( + HalfSquaredError, AbsoluteError, - HalfBinomialLoss, - HalfGammaLoss, - HalfMultinomialLoss, + PinballLoss, HalfPoissonLoss, - HalfSquaredError, + HalfGammaLoss, HalfTweedieLoss, HalfTweedieLossIdentity, - PinballLoss, + HalfBinomialLoss, + HalfMultinomialLoss, ) + __all__ = [ "HalfSquaredError", "AbsoluteError", diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py index 4542a83bef559..6fbe675fef533 100644 --- a/sklearn/_loss/glm_distribution.py +++ b/sklearn/_loss/glm_distribution.py @@ -9,13 +9,14 @@ # This is only used for backward compatibility in _GeneralizedLinearRegressor # for the deprecated family attribute. -import numbers from abc import ABCMeta, abstractmethod from collections import namedtuple +import numbers import numpy as np from scipy.special import xlogy + DistributionBoundary = namedtuple("DistributionBoundary", ("value", "inclusive")) diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py index 3e7c242724b9d..4cb46a15ef263 100644 --- a/sklearn/_loss/link.py +++ b/sklearn/_loss/link.py @@ -9,7 +9,6 @@ import numpy as np from scipy.special import expit, logit from scipy.stats import gmean - from ..utils.extmath import softmax diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py index d17f55fa6a817..ad52ffd438238 100644 --- a/sklearn/_loss/loss.py +++ b/sklearn/_loss/loss.py @@ -16,25 +16,29 @@ # - Replace link module of GLMs. import numbers - import numpy as np from scipy.special import xlogy - -from ..utils import check_scalar -from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper -from ..utils.stats import _weighted_percentile from ._loss import ( + CyHalfSquaredError, CyAbsoluteError, - CyHalfBinomialLoss, - CyHalfGammaLoss, - CyHalfMultinomialLoss, + CyPinballLoss, CyHalfPoissonLoss, - CyHalfSquaredError, + CyHalfGammaLoss, CyHalfTweedieLoss, CyHalfTweedieLossIdentity, - CyPinballLoss, + CyHalfBinomialLoss, + CyHalfMultinomialLoss, ) -from .link import IdentityLink, Interval, LogitLink, LogLink, MultinomialLogit +from .link import ( + Interval, + IdentityLink, + LogLink, + LogitLink, + MultinomialLogit, +) +from ..utils import check_scalar +from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper +from ..utils.stats import _weighted_percentile # Note: The shape of raw_prediction for multiclass classifications are diff --git a/sklearn/_loss/setup.py b/sklearn/_loss/setup.py index 4be1481df1a7f..2a2d2b5f13b8a 100644 --- a/sklearn/_loss/setup.py +++ b/sklearn/_loss/setup.py @@ -1,6 +1,5 @@ import numpy from numpy.distutils.misc_util import Configuration - from sklearn._build_utils import gen_from_templates diff --git a/sklearn/_loss/tests/test_glm_distribution.py b/sklearn/_loss/tests/test_glm_distribution.py index 35eae4f5f7418..aaaa9de39a502 100644 --- a/sklearn/_loss/tests/test_glm_distribution.py +++ b/sklearn/_loss/tests/test_glm_distribution.py @@ -4,17 +4,20 @@ # # TODO(1.3): remove file import numpy as np -import pytest -from numpy.testing import assert_allclose, assert_array_equal +from numpy.testing import ( + assert_allclose, + assert_array_equal, +) from scipy.optimize import check_grad +import pytest from sklearn._loss.glm_distribution import ( - DistributionBoundary, - GammaDistribution, - InverseGaussianDistribution, + TweedieDistribution, NormalDistribution, PoissonDistribution, - TweedieDistribution, + GammaDistribution, + InverseGaussianDistribution, + DistributionBoundary, ) diff --git a/sklearn/_loss/tests/test_link.py b/sklearn/_loss/tests/test_link.py index 4311293aeda41..435361eaa50f1 100644 --- a/sklearn/_loss/tests/test_link.py +++ b/sklearn/_loss/tests/test_link.py @@ -1,8 +1,14 @@ import numpy as np -import pytest from numpy.testing import assert_allclose, assert_array_equal +import pytest + +from sklearn._loss.link import ( + _LINKS, + _inclusive_low_high, + MultinomialLogit, + Interval, +) -from sklearn._loss.link import _LINKS, Interval, MultinomialLogit, _inclusive_low_high LINK_FUNCTIONS = list(_LINKS.values()) diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py index 90c7149c092b7..8aeb350440005 100644 --- a/sklearn/_loss/tests/test_loss.py +++ b/sklearn/_loss/tests/test_loss.py @@ -1,17 +1,22 @@ import pickle import numpy as np -import pytest from numpy.testing import assert_allclose, assert_array_equal +import pytest from pytest import approx -from scipy.optimize import LinearConstraint, minimize, minimize_scalar, newton +from scipy.optimize import ( + minimize, + minimize_scalar, + newton, + LinearConstraint, +) from scipy.special import logsumexp -from sklearn._loss.link import IdentityLink, _inclusive_low_high +from sklearn._loss.link import _inclusive_low_high, IdentityLink from sklearn._loss.loss import ( _LOSSES, - AbsoluteError, BaseLoss, + AbsoluteError, HalfBinomialLoss, HalfGammaLoss, HalfMultinomialLoss, @@ -24,6 +29,7 @@ from sklearn.utils import assert_all_finite from sklearn.utils._testing import create_memmap_backed_data, skip_if_32bit + ALL_LOSSES = list(_LOSSES.values()) LOSS_INSTANCES = [loss() for loss in ALL_LOSSES] diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index b85f99869ea66..957e1e01f0551 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -1,6 +1,7 @@ """All minimum dependencies for scikit-learn.""" -import argparse import platform +import argparse + # scipy and cython should by in sync with pyproject.toml diff --git a/sklearn/base.py b/sklearn/base.py index c49624a99a07e..757fb0e23841a 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -4,29 +4,29 @@ # License: BSD 3 clause import copy -import inspect -import platform -import re import warnings from collections import defaultdict +import platform +import inspect +import re import numpy as np from . import __version__ from ._config import get_config from .utils import _IS_32BIT -from .utils._estimator_html_repr import estimator_html_repr -from .utils._tags import _DEFAULT_TAGS -from .utils.validation import ( - _check_feature_names_in, - _check_y, - _generate_get_feature_names_out, - _get_feature_names, - _num_features, - check_array, - check_is_fitted, - check_X_y, +from .utils._tags import ( + _DEFAULT_TAGS, ) +from .utils.validation import check_X_y +from .utils.validation import check_array +from .utils.validation import _check_y +from .utils.validation import _num_features +from .utils.validation import _check_feature_names_in +from .utils.validation import _generate_get_feature_names_out +from .utils.validation import check_is_fitted +from .utils._estimator_html_repr import estimator_html_repr +from .utils.validation import _get_feature_names def clone(estimator, *, safe=True): diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 0e7a66e0022d2..f84ff898fc2ee 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -8,33 +8,34 @@ # License: BSD 3 clause import warnings -from functools import partial from inspect import signature -from math import log +from functools import partial +from math import log import numpy as np -from scipy.optimize import fmin_bfgs -from scipy.special import expit, xlogy - from joblib import Parallel +from scipy.special import expit +from scipy.special import xlogy +from scipy.optimize import fmin_bfgs + from .base import ( BaseEstimator, ClassifierMixin, - MetaEstimatorMixin, RegressorMixin, clone, + MetaEstimatorMixin, is_classifier, ) -from .isotonic import IsotonicRegression -from .metrics._base import _check_pos_label_consistency -from .metrics._plot.base import _get_response -from .model_selection import check_cv, cross_val_predict -from .preprocessing import LabelEncoder, label_binarize -from .svm import LinearSVC -from .utils import _safe_indexing, check_matplotlib_support, column_or_1d, indexable -from .utils.fixes import delayed +from .preprocessing import label_binarize, LabelEncoder +from .utils import ( + column_or_1d, + indexable, + check_matplotlib_support, +) + from .utils.multiclass import check_classification_targets +from .utils.fixes import delayed from .utils.validation import ( _check_fit_params, _check_sample_weight, @@ -42,6 +43,12 @@ check_consistent_length, check_is_fitted, ) +from .utils import _safe_indexing +from .isotonic import IsotonicRegression +from .svm import LinearSVC +from .model_selection import check_cv, cross_val_predict +from .metrics._base import _check_pos_label_consistency +from .metrics._plot.base import _get_response class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py index 13f42c6aa7a9c..9ba72d341c389 100644 --- a/sklearn/cluster/__init__.py +++ b/sklearn/cluster/__init__.py @@ -3,26 +3,26 @@ algorithms. """ -from ._affinity_propagation import AffinityPropagation, affinity_propagation +from ._spectral import spectral_clustering, SpectralClustering +from ._mean_shift import mean_shift, MeanShift, estimate_bandwidth, get_bin_seeds +from ._affinity_propagation import affinity_propagation, AffinityPropagation from ._agglomerative import ( + ward_tree, AgglomerativeClustering, - FeatureAgglomeration, linkage_tree, - ward_tree, + FeatureAgglomeration, ) -from ._bicluster import SpectralBiclustering, SpectralCoclustering -from ._birch import Birch +from ._kmeans import k_means, KMeans, MiniBatchKMeans, kmeans_plusplus from ._bisect_k_means import BisectingKMeans -from ._dbscan import DBSCAN, dbscan -from ._kmeans import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus -from ._mean_shift import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift +from ._dbscan import dbscan, DBSCAN from ._optics import ( OPTICS, cluster_optics_dbscan, - cluster_optics_xi, compute_optics_graph, + cluster_optics_xi, ) -from ._spectral import SpectralClustering, spectral_clustering +from ._bicluster import SpectralBiclustering, SpectralCoclustering +from ._birch import Birch __all__ = [ "AffinityPropagation", diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py index 3dd8dff4c8981..f0274b113a341 100644 --- a/sklearn/cluster/_affinity_propagation.py +++ b/sklearn/cluster/_affinity_propagation.py @@ -10,12 +10,14 @@ import numpy as np -from .._config import config_context -from ..base import BaseEstimator, ClusterMixin from ..exceptions import ConvergenceWarning -from ..metrics import euclidean_distances, pairwise_distances_argmin -from ..utils import as_float_array, check_random_state, check_scalar +from ..base import BaseEstimator, ClusterMixin +from ..utils import as_float_array, check_random_state +from ..utils import check_scalar from ..utils.validation import check_is_fitted +from ..metrics import euclidean_distances +from ..metrics import pairwise_distances_argmin +from .._config import config_context def _equal_similarities_and_preferences(S, preference): diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 4078264144f3e..b399f805a9d40 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -15,9 +15,9 @@ from scipy.sparse.csgraph import connected_components from ..base import BaseEstimator, ClusterMixin, _ClassNamePrefixFeaturesOutMixin +from ..metrics.pairwise import paired_distances from ..metrics import DistanceMetric from ..metrics._dist_metrics import METRIC_MAPPING -from ..metrics.pairwise import paired_distances from ..utils import check_array from ..utils._fast_dict import IntFloatDict from ..utils.graph import _fix_connected_components diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py index c336d82dd6a48..a360802009f2c 100644 --- a/sklearn/cluster/_bicluster.py +++ b/sklearn/cluster/_bicluster.py @@ -2,19 +2,24 @@ # Authors : Kemal Eren # License: BSD 3 clause -import numbers from abc import ABCMeta, abstractmethod import numpy as np +import numbers + from scipy.linalg import norm from scipy.sparse import dia_matrix, issparse from scipy.sparse.linalg import eigsh, svds +from . import KMeans, MiniBatchKMeans from ..base import BaseEstimator, BiclusterMixin -from ..utils import check_random_state, check_scalar +from ..utils import check_random_state +from ..utils import check_scalar + from ..utils.extmath import make_nonnegative, randomized_svd, safe_sparse_dot + from ..utils.validation import assert_all_finite -from ._kmeans import KMeans, MiniBatchKMeans + __all__ = ["SpectralCoclustering", "SpectralBiclustering"] diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py index 0e9c2ba616d99..2bfdd2971e4d4 100644 --- a/sklearn/cluster/_birch.py +++ b/sklearn/cluster/_birch.py @@ -3,27 +3,26 @@ # Joel Nothman # License: BSD 3 clause -import numbers import warnings -from math import sqrt - +import numbers import numpy as np from scipy import sparse +from math import sqrt -from .._config import config_context +from ..metrics import pairwise_distances_argmin +from ..metrics.pairwise import euclidean_distances from ..base import ( - BaseEstimator, - ClusterMixin, TransformerMixin, + ClusterMixin, + BaseEstimator, _ClassNamePrefixFeaturesOutMixin, ) -from ..exceptions import ConvergenceWarning -from ..metrics import pairwise_distances_argmin -from ..metrics.pairwise import euclidean_distances -from ..utils import check_scalar, deprecated from ..utils.extmath import row_norms +from ..utils import check_scalar, deprecated from ..utils.validation import check_is_fitted +from ..exceptions import ConvergenceWarning from . import AgglomerativeClustering +from .._config import config_context def _iterate_sparse_X(X): diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py index d1e1be281de75..c7dc2c5a772e5 100644 --- a/sklearn/cluster/_bisect_k_means.py +++ b/sklearn/cluster/_bisect_k_means.py @@ -6,21 +6,18 @@ import numpy as np import scipy.sparse as sp -from ..utils._openmp_helpers import _openmp_effective_n_threads +from ._kmeans import _BaseKMeans +from ._kmeans import _kmeans_single_elkan +from ._kmeans import _kmeans_single_lloyd +from ._kmeans import _labels_inertia_threadpool_limit +from ._k_means_common import _inertia_dense +from ._k_means_common import _inertia_sparse from ..utils.extmath import row_norms -from ..utils.validation import ( - _check_sample_weight, - _is_arraylike_not_scalar, - check_is_fitted, - check_random_state, -) -from ._k_means_common import _inertia_dense, _inertia_sparse -from ._kmeans import ( - _BaseKMeans, - _kmeans_single_elkan, - _kmeans_single_lloyd, - _labels_inertia_threadpool_limit, -) +from ..utils._openmp_helpers import _openmp_effective_n_threads +from ..utils.validation import check_is_fitted +from ..utils.validation import _check_sample_weight +from ..utils.validation import check_random_state +from ..utils.validation import _is_arraylike_not_scalar class _BisectingTree: diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py index 7975d7e9833ae..f5d5bc81e6bba 100644 --- a/sklearn/cluster/_dbscan.py +++ b/sklearn/cluster/_dbscan.py @@ -8,16 +8,16 @@ # # License: BSD 3 clause +import numpy as np import numbers import warnings - -import numpy as np from scipy import sparse -from ..base import BaseEstimator, ClusterMixin -from ..neighbors import NearestNeighbors from ..utils import check_scalar +from ..base import BaseEstimator, ClusterMixin from ..utils.validation import _check_sample_weight +from ..neighbors import NearestNeighbors + from ._dbscan_inner import dbscan_inner diff --git a/sklearn/cluster/_dbscan_inner.pyx b/sklearn/cluster/_dbscan_inner.pyx index 22dc29517170f..17ef3f1703a8b 100644 --- a/sklearn/cluster/_dbscan_inner.pyx +++ b/sklearn/cluster/_dbscan_inner.pyx @@ -2,8 +2,8 @@ # Author: Lars Buitinck # License: 3-clause BSD -cimport numpy as cnp from libcpp.vector cimport vector +cimport numpy as cnp cnp.import_array() diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index 4d392de5f7fc0..457a83dd41e71 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -6,10 +6,10 @@ # License: BSD 3 clause import numpy as np -from scipy.sparse import issparse from ..base import TransformerMixin from ..utils.validation import check_is_fitted +from scipy.sparse import issparse ############################################################################### # Mixin class for feature agglomeration. diff --git a/sklearn/cluster/_hierarchical_fast.pyx b/sklearn/cluster/_hierarchical_fast.pyx index b164d678aa788..3ca48c8b7fc2c 100644 --- a/sklearn/cluster/_hierarchical_fast.pyx +++ b/sklearn/cluster/_hierarchical_fast.pyx @@ -1,9 +1,8 @@ # Author: Gael Varoquaux import numpy as np - -cimport cython cimport numpy as cnp +cimport cython ctypedef cnp.float64_t DOUBLE ctypedef cnp.npy_intp INTP @@ -11,15 +10,14 @@ ctypedef cnp.int8_t INT8 cnp.import_array() -# C++ -from cython.operator cimport dereference as deref -from cython.operator cimport preincrement as inc -from libc.math cimport fmax -from libcpp.map cimport map as cpp_map - from ..metrics._dist_metrics cimport DistanceMetric from ..utils._fast_dict cimport IntFloatDict +# C++ +from cython.operator cimport dereference as deref, preincrement as inc +from libcpp.map cimport map as cpp_map +from libc.math cimport fmax + DTYPE = np.float64 ctypedef cnp.float64_t DTYPE_t diff --git a/sklearn/cluster/_k_means_common.pyx b/sklearn/cluster/_k_means_common.pyx index 1cdcaa703f972..69acdb8410ad6 100644 --- a/sklearn/cluster/_k_means_common.pyx +++ b/sklearn/cluster/_k_means_common.pyx @@ -9,13 +9,13 @@ # provided by the user). This is fixed in cython > 0.3. import numpy as np - from cython cimport floating from cython.parallel cimport prange from libc.math cimport sqrt from ..utils.extmath import row_norms + # Number of samples per data chunk defined as a global constant. CHUNK_SIZE = 256 diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx index c357ed2d7217a..ab30a500a2a7d 100644 --- a/sklearn/cluster/_k_means_elkan.pyx +++ b/sklearn/cluster/_k_means_elkan.pyx @@ -8,26 +8,20 @@ IF SKLEARN_OPENMP_PARALLELISM_ENABLED: cimport openmp - from cython cimport floating - -from cython.parallel import parallel, prange - +from cython.parallel import prange, parallel from libc.math cimport sqrt from libc.stdlib cimport calloc, free -from libc.string cimport memcpy, memset +from libc.string cimport memset, memcpy from ..utils.extmath import row_norms from ._k_means_common import CHUNK_SIZE - -from ._k_means_common cimport ( - _average_centers, - _center_shift, - _euclidean_dense_dense, - _euclidean_sparse_dense, - _relocate_empty_clusters_dense, - _relocate_empty_clusters_sparse, -) +from ._k_means_common cimport _relocate_empty_clusters_dense +from ._k_means_common cimport _relocate_empty_clusters_sparse +from ._k_means_common cimport _euclidean_dense_dense +from ._k_means_common cimport _euclidean_sparse_dense +from ._k_means_common cimport _average_centers +from ._k_means_common cimport _center_shift def init_bounds_dense( diff --git a/sklearn/cluster/_k_means_lloyd.pyx b/sklearn/cluster/_k_means_lloyd.pyx index e7eb73f3a3613..55600f2910b05 100644 --- a/sklearn/cluster/_k_means_lloyd.pyx +++ b/sklearn/cluster/_k_means_lloyd.pyx @@ -6,27 +6,19 @@ IF SKLEARN_OPENMP_PARALLELISM_ENABLED: cimport openmp - from cython cimport floating - -from cython.parallel import parallel, prange - -from libc.float cimport DBL_MAX, FLT_MAX -from libc.stdlib cimport calloc, free, malloc +from cython.parallel import prange, parallel +from libc.stdlib cimport malloc, calloc, free from libc.string cimport memset +from libc.float cimport DBL_MAX, FLT_MAX from ..utils.extmath import row_norms - -from ..utils._cython_blas cimport NoTrans, RowMajor, Trans, _gemm - +from ..utils._cython_blas cimport _gemm +from ..utils._cython_blas cimport RowMajor, Trans, NoTrans from ._k_means_common import CHUNK_SIZE - -from ._k_means_common cimport ( - _average_centers, - _center_shift, - _relocate_empty_clusters_dense, - _relocate_empty_clusters_sparse, -) +from ._k_means_common cimport _relocate_empty_clusters_dense +from ._k_means_common cimport _relocate_empty_clusters_sparse +from ._k_means_common cimport _average_centers, _center_shift def lloyd_iter_chunked_dense( diff --git a/sklearn/cluster/_k_means_minibatch.pyx b/sklearn/cluster/_k_means_minibatch.pyx index ebfdcf2dac687..b7bd4b1409284 100644 --- a/sklearn/cluster/_k_means_minibatch.pyx +++ b/sklearn/cluster/_k_means_minibatch.pyx @@ -4,7 +4,7 @@ from cython cimport floating from cython.parallel cimport parallel, prange -from libc.stdlib cimport free, malloc +from libc.stdlib cimport malloc, free def _minibatch_update_dense( diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py index b6f974d6191a0..eca8a5c2dc3ce 100644 --- a/sklearn/cluster/_kmeans.py +++ b/sklearn/cluster/_kmeans.py @@ -11,8 +11,8 @@ # Robert Layton # License: BSD 3 clause -import warnings from abc import ABC, abstractmethod +import warnings import numpy as np import scipy.sparse as sp @@ -23,34 +23,33 @@ TransformerMixin, _ClassNamePrefixFeaturesOutMixin, ) -from ..exceptions import ConvergenceWarning -from ..metrics.pairwise import _euclidean_distances, euclidean_distances -from ..utils import check_array, check_random_state -from ..utils._openmp_helpers import _openmp_effective_n_threads -from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper +from ..metrics.pairwise import euclidean_distances +from ..metrics.pairwise import _euclidean_distances from ..utils.extmath import row_norms, stable_cumsum -from ..utils.fixes import threadpool_info, threadpool_limits -from ..utils.sparsefuncs import mean_variance_axis +from ..utils.fixes import threadpool_limits +from ..utils.fixes import threadpool_info from ..utils.sparsefuncs_fast import assign_rows_csr -from ..utils.validation import ( - _check_sample_weight, - _is_arraylike_not_scalar, - check_is_fitted, -) -from ._k_means_common import ( - CHUNK_SIZE, - _inertia_dense, - _inertia_sparse, - _is_same_clustering, -) -from ._k_means_elkan import ( - elkan_iter_chunked_dense, - elkan_iter_chunked_sparse, - init_bounds_dense, - init_bounds_sparse, -) -from ._k_means_lloyd import lloyd_iter_chunked_dense, lloyd_iter_chunked_sparse -from ._k_means_minibatch import _minibatch_update_dense, _minibatch_update_sparse +from ..utils.sparsefuncs import mean_variance_axis +from ..utils import check_array +from ..utils import check_random_state +from ..utils.validation import check_is_fitted, _check_sample_weight +from ..utils.validation import _is_arraylike_not_scalar +from ..utils._openmp_helpers import _openmp_effective_n_threads +from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper +from ..exceptions import ConvergenceWarning +from ._k_means_common import CHUNK_SIZE +from ._k_means_common import _inertia_dense +from ._k_means_common import _inertia_sparse +from ._k_means_common import _is_same_clustering +from ._k_means_minibatch import _minibatch_update_dense +from ._k_means_minibatch import _minibatch_update_sparse +from ._k_means_lloyd import lloyd_iter_chunked_dense +from ._k_means_lloyd import lloyd_iter_chunked_sparse +from ._k_means_elkan import init_bounds_dense +from ._k_means_elkan import init_bounds_sparse +from ._k_means_elkan import elkan_iter_chunked_dense +from ._k_means_elkan import elkan_iter_chunked_sparse + ############################################################################### # Initialization heuristic diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py index cfe2b94e9072e..c686ab9895425 100644 --- a/sklearn/cluster/_mean_shift.py +++ b/sklearn/cluster/_mean_shift.py @@ -14,20 +14,18 @@ # Gael Varoquaux # Martino Sorbaro -import warnings -from collections import defaultdict - import numpy as np - +import warnings from joblib import Parallel -from .._config import config_context +from collections import defaultdict +from ..utils.validation import check_is_fitted +from ..utils.fixes import delayed +from ..utils import check_random_state, gen_batches, check_array from ..base import BaseEstimator, ClusterMixin -from ..metrics.pairwise import pairwise_distances_argmin from ..neighbors import NearestNeighbors -from ..utils import check_array, check_random_state, gen_batches -from ..utils.fixes import delayed -from ..utils.validation import check_is_fitted +from ..metrics.pairwise import pairwise_distances_argmin +from .._config import config_context def estimate_bandwidth(X, *, quantile=0.3, n_samples=None, random_state=0, n_jobs=None): diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index c477de3c50997..a6b159ef5c5a0 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -11,17 +11,16 @@ """ import warnings - import numpy as np -from scipy.sparse import SparseEfficiencyWarning, issparse -from ..base import BaseEstimator, ClusterMixin from ..exceptions import DataConversionWarning -from ..metrics import pairwise_distances from ..metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS -from ..neighbors import NearestNeighbors from ..utils import gen_batches, get_chunk_n_rows from ..utils.validation import check_memory +from ..neighbors import NearestNeighbors +from ..base import BaseEstimator, ClusterMixin +from ..metrics import pairwise_distances +from scipy.sparse import issparse, SparseEfficiencyWarning class OPTICS(ClusterMixin, BaseEstimator): diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py index 001af1f2a3b5e..390b567c0d0bb 100644 --- a/sklearn/cluster/_spectral.py +++ b/sklearn/cluster/_spectral.py @@ -10,14 +10,15 @@ import warnings import numpy as np + from scipy.linalg import LinAlgError, qr, svd from scipy.sparse import csc_matrix from ..base import BaseEstimator, ClusterMixin -from ..manifold import spectral_embedding +from ..utils import check_random_state, as_float_array, check_scalar from ..metrics.pairwise import pairwise_kernels -from ..neighbors import NearestNeighbors, kneighbors_graph -from ..utils import as_float_array, check_random_state, check_scalar +from ..neighbors import kneighbors_graph, NearestNeighbors +from ..manifold import spectral_embedding from ._kmeans import k_means diff --git a/sklearn/cluster/tests/common.py b/sklearn/cluster/tests/common.py index b1fe047fe230a..0f4bd9e14926d 100644 --- a/sklearn/cluster/tests/common.py +++ b/sklearn/cluster/tests/common.py @@ -5,6 +5,7 @@ import numpy as np + ############################################################################### # Generate sample data diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index c5f5db6a72b1a..e5dc5d584266d 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -3,18 +3,20 @@ """ -import warnings - import numpy as np import pytest +import warnings + from scipy.sparse import csr_matrix -from sklearn.cluster import AffinityPropagation, affinity_propagation +from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import assert_array_equal + +from sklearn.cluster import AffinityPropagation from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences +from sklearn.cluster import affinity_propagation from sklearn.datasets import make_blobs -from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import euclidean_distances -from sklearn.utils._testing import assert_array_equal n_clusters = 3 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10 diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index 38cbbffc84154..184fe3891804e 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -4,21 +4,23 @@ import pytest from scipy.sparse import csr_matrix, issparse +from sklearn.model_selection import ParameterGrid + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal + from sklearn.base import BaseEstimator, BiclusterMixin -from sklearn.cluster import SpectralBiclustering, SpectralCoclustering -from sklearn.cluster._bicluster import ( - _bistochastic_normalize, - _log_normalize, - _scale_normalize, -) -from sklearn.datasets import make_biclusters, make_checkerboard + +from sklearn.cluster import SpectralCoclustering +from sklearn.cluster import SpectralBiclustering +from sklearn.cluster._bicluster import _scale_normalize +from sklearn.cluster._bicluster import _bistochastic_normalize +from sklearn.cluster._bicluster import _log_normalize + from sklearn.metrics import consensus_score, v_measure_score -from sklearn.model_selection import ParameterGrid -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) + +from sklearn.datasets import make_biclusters, make_checkerboard class MockBiclustering(BiclusterMixin, BaseEstimator): diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index 38b672ceb6348..c5d88c2bc6f0e 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -2,22 +2,22 @@ Tests for the birch clustering algorithm. """ +from scipy import sparse import numpy as np import pytest -from scipy import sparse -from sklearn.cluster import AgglomerativeClustering, Birch from sklearn.cluster.tests.common import generate_clustered_data +from sklearn.cluster import Birch +from sklearn.cluster import AgglomerativeClustering from sklearn.datasets import make_blobs from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import ElasticNet from sklearn.metrics import pairwise_distances_argmin, v_measure_score -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose def test_n_samples_leaves_roots(): diff --git a/sklearn/cluster/tests/test_bisect_k_means.py b/sklearn/cluster/tests/test_bisect_k_means.py index 48ba872d415fb..a9904e61de04b 100644 --- a/sklearn/cluster/tests/test_bisect_k_means.py +++ b/sklearn/cluster/tests/test_bisect_k_means.py @@ -2,8 +2,8 @@ import pytest import scipy.sparse as sp +from sklearn.utils._testing import assert_array_equal, assert_allclose from sklearn.cluster import BisectingKMeans -from sklearn.utils._testing import assert_allclose, assert_array_equal @pytest.mark.parametrize("bisecting_strategy", ["biggest_inertia", "largest_cluster"]) diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index aec8a8d789cbf..b3b58b7a79b4b 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -3,18 +3,23 @@ """ import pickle -import warnings import numpy as np -import pytest -from scipy import sparse + +import warnings + from scipy.spatial import distance +from scipy import sparse -from sklearn.cluster import DBSCAN, dbscan +import pytest + +from sklearn.utils._testing import assert_array_equal +from sklearn.neighbors import NearestNeighbors +from sklearn.cluster import DBSCAN +from sklearn.cluster import dbscan from sklearn.cluster.tests.common import generate_clustered_data from sklearn.metrics.pairwise import pairwise_distances -from sklearn.neighbors import NearestNeighbors -from sklearn.utils._testing import assert_array_equal + n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters) diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py index 1f985fae05dc3..3e4aa816b79c0 100644 --- a/sklearn/cluster/tests/test_feature_agglomeration.py +++ b/sklearn/cluster/tests/test_feature_agglomeration.py @@ -3,11 +3,11 @@ """ # Authors: Sergul Aydore 2017 import numpy as np -from numpy.testing import assert_array_equal +from numpy.testing import assert_array_equal from sklearn.cluster import FeatureAgglomeration -from sklearn.datasets import make_blobs from sklearn.utils._testing import assert_array_almost_equal +from sklearn.datasets import make_blobs def test_feature_agglomeration(): diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index a478dc3ee6544..c6607779f80fc 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -6,48 +6,48 @@ # Matteo Visconti di Oleggio Castello 2014 # License: BSD 3 clause import itertools +from tempfile import mkdtemp import shutil +import pytest from functools import partial -from tempfile import mkdtemp import numpy as np -import pytest from scipy import sparse from scipy.cluster import hierarchy from scipy.sparse.csgraph import connected_components -from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration, ward_tree +from sklearn.metrics.cluster import adjusted_rand_score +from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS +from sklearn.utils._testing import assert_almost_equal, create_memmap_backed_data +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import ignore_warnings + +from sklearn.cluster import ward_tree +from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration from sklearn.cluster._agglomerative import ( - _TREE_BUILDERS, - _fix_connectivity, _hc_cut, + _TREE_BUILDERS, linkage_tree, + _fix_connectivity, ) -from sklearn.cluster._hierarchical_fast import ( - average_merge, - max_merge, - mst_linkage_core, -) -from sklearn.datasets import make_circles, make_moons from sklearn.feature_extraction.image import grid_to_graph from sklearn.metrics import DistanceMetric -from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score from sklearn.metrics.pairwise import ( PAIRED_DISTANCES, cosine_distances, manhattan_distances, pairwise_distances, ) -from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS +from sklearn.metrics.cluster import normalized_mutual_info_score from sklearn.neighbors import kneighbors_graph -from sklearn.utils._fast_dict import IntFloatDict -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - create_memmap_backed_data, - ignore_warnings, +from sklearn.cluster._hierarchical_fast import ( + average_merge, + max_merge, + mst_linkage_core, ) +from sklearn.utils._fast_dict import IntFloatDict +from sklearn.utils._testing import assert_array_equal +from sklearn.datasets import make_moons, make_circles def test_linkage_misc(): diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index cab5d9b03512f..93de26c26c320 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -1,31 +1,36 @@ """Testing for K-means""" import re import sys -from io import StringIO import numpy as np -import pytest from scipy import sparse as sp +import pytest + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils.fixes import threadpool_limits from sklearn.base import clone -from sklearn.cluster import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus -from sklearn.cluster._k_means_common import ( - _euclidean_dense_dense_wrapper, - _euclidean_sparse_dense_wrapper, - _inertia_dense, - _inertia_sparse, - _is_same_clustering, - _relocate_empty_clusters_dense, - _relocate_empty_clusters_sparse, -) -from sklearn.cluster._kmeans import _labels_inertia, _mini_batch_step -from sklearn.datasets import make_blobs from sklearn.exceptions import ConvergenceWarning -from sklearn.metrics import pairwise_distances, pairwise_distances_argmin -from sklearn.metrics.cluster import v_measure_score -from sklearn.utils._testing import assert_allclose, assert_array_equal + from sklearn.utils.extmath import row_norms -from sklearn.utils.fixes import threadpool_limits +from sklearn.metrics import pairwise_distances +from sklearn.metrics import pairwise_distances_argmin +from sklearn.metrics.cluster import v_measure_score +from sklearn.cluster import KMeans, k_means, kmeans_plusplus +from sklearn.cluster import MiniBatchKMeans +from sklearn.cluster._kmeans import _labels_inertia +from sklearn.cluster._kmeans import _mini_batch_step +from sklearn.cluster._k_means_common import _relocate_empty_clusters_dense +from sklearn.cluster._k_means_common import _relocate_empty_clusters_sparse +from sklearn.cluster._k_means_common import _euclidean_dense_dense_wrapper +from sklearn.cluster._k_means_common import _euclidean_sparse_dense_wrapper +from sklearn.cluster._k_means_common import _inertia_dense +from sklearn.cluster._k_means_common import _inertia_sparse +from sklearn.cluster._k_means_common import _is_same_clustering +from sklearn.datasets import make_blobs +from io import StringIO + # non centered, sparse centers to check the centers = np.array( diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py index f04e4ab7e23ef..cdd1134156173 100644 --- a/sklearn/cluster/tests/test_mean_shift.py +++ b/sklearn/cluster/tests/test_mean_shift.py @@ -3,20 +3,23 @@ """ -import warnings - import numpy as np +import warnings import pytest + from scipy import sparse -from sklearn.cluster import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose + +from sklearn.cluster import MeanShift +from sklearn.cluster import mean_shift +from sklearn.cluster import estimate_bandwidth +from sklearn.cluster import get_bin_seeds from sklearn.datasets import make_blobs from sklearn.metrics import v_measure_score -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - assert_array_equal, -) + n_clusters = 3 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10 diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py index 66c7f127cd00a..6de9e9c656e22 100644 --- a/sklearn/cluster/tests/test_optics.py +++ b/sklearn/cluster/tests/test_optics.py @@ -1,21 +1,24 @@ # Authors: Shane Grigsby # Adrin Jalali # License: BSD 3 clause -import warnings - import numpy as np import pytest from scipy import sparse +import warnings -from sklearn.cluster import DBSCAN, OPTICS -from sklearn.cluster._optics import _extend_region, _extract_xi_labels -from sklearn.cluster.tests.common import generate_clustered_data from sklearn.datasets import make_blobs -from sklearn.exceptions import DataConversionWarning, EfficiencyWarning +from sklearn.cluster import OPTICS +from sklearn.cluster._optics import _extend_region, _extract_xi_labels +from sklearn.exceptions import DataConversionWarning from sklearn.metrics.cluster import contingency_matrix from sklearn.metrics.pairwise import pairwise_distances +from sklearn.cluster import DBSCAN from sklearn.utils import shuffle -from sklearn.utils._testing import assert_allclose, assert_array_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose +from sklearn.exceptions import EfficiencyWarning +from sklearn.cluster.tests.common import generate_clustered_data + rng = np.random.RandomState(0) n_points_per_cluster = 10 diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index 29c6a01279bc2..19e92101ef7d4 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -1,21 +1,25 @@ """Testing for Spectral Clustering methods""" -import pickle import re import numpy as np -import pytest from scipy import sparse from scipy.linalg import LinAlgError +import pytest + +import pickle + +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_array_equal + from sklearn.cluster import SpectralClustering, spectral_clustering -from sklearn.cluster._spectral import cluster_qr, discretize -from sklearn.datasets import make_blobs +from sklearn.cluster._spectral import discretize, cluster_qr from sklearn.feature_extraction import img_to_graph -from sklearn.metrics import adjusted_rand_score, pairwise_distances +from sklearn.metrics import pairwise_distances +from sklearn.metrics import adjusted_rand_score from sklearn.metrics.pairwise import kernel_metrics, rbf_kernel from sklearn.neighbors import NearestNeighbors -from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_equal +from sklearn.datasets import make_blobs try: from pyamg import smoothed_aggregation_solver # noqa diff --git a/sklearn/compose/__init__.py b/sklearn/compose/__init__.py index 7b137cdf9e07f..8be8d17040e82 100644 --- a/sklearn/compose/__init__.py +++ b/sklearn/compose/__init__.py @@ -7,11 +7,12 @@ from ._column_transformer import ( ColumnTransformer, - make_column_selector, make_column_transformer, + make_column_selector, ) from ._target import TransformedTargetRegressor + __all__ = [ "ColumnTransformer", "make_column_transformer", diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index b376b2f8fbc29..15f1424498856 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -3,27 +3,28 @@ to work with heterogeneous data and to apply different transformers to different columns. """ -from collections import Counter - # Author: Andreas Mueller # Joris Van den Bossche # License: BSD from itertools import chain +from collections import Counter import numpy as np from scipy import sparse - from joblib import Parallel -from ..base import TransformerMixin, clone -from ..pipeline import _fit_transform_one, _name_estimators, _transform_one -from ..preprocessing import FunctionTransformer -from ..utils import Bunch, _get_column_indices, _safe_indexing +from ..base import clone, TransformerMixin from ..utils._estimator_html_repr import _VisualBlock +from ..pipeline import _fit_transform_one, _transform_one, _name_estimators +from ..preprocessing import FunctionTransformer +from ..utils import Bunch +from ..utils import _safe_indexing +from ..utils import _get_column_indices from ..utils.deprecation import deprecated -from ..utils.fixes import delayed from ..utils.metaestimators import _BaseComposition -from ..utils.validation import _check_feature_names_in, check_array, check_is_fitted +from ..utils.validation import check_array, check_is_fitted, _check_feature_names_in +from ..utils.fixes import delayed + __all__ = ["ColumnTransformer", "make_column_transformer", "make_column_selector"] diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index 7c5cb40a3a851..74a826f031a83 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -7,11 +7,11 @@ import numpy as np from ..base import BaseEstimator, RegressorMixin, clone -from ..exceptions import NotFittedError -from ..preprocessing import FunctionTransformer -from ..utils import _safe_indexing, check_array -from ..utils._tags import _safe_tags from ..utils.validation import check_is_fitted +from ..utils._tags import _safe_tags +from ..utils import check_array, _safe_indexing +from ..preprocessing import FunctionTransformer +from ..exceptions import NotFittedError __all__ = ["TransformedTargetRegressor"] diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index a8861a23bdbd0..7b2c0b91cccdf 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -1,33 +1,28 @@ """ Test the ColumnTransformer. """ -import pickle import re +import pickle import numpy as np +from scipy import sparse import pytest + from numpy.testing import assert_allclose -from scipy import sparse +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import assert_almost_equal from sklearn.base import BaseEstimator from sklearn.compose import ( ColumnTransformer, - make_column_selector, make_column_transformer, + make_column_selector, ) from sklearn.exceptions import NotFittedError +from sklearn.preprocessing import FunctionTransformer +from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder from sklearn.feature_extraction import DictVectorizer -from sklearn.preprocessing import ( - FunctionTransformer, - Normalizer, - OneHotEncoder, - StandardScaler, -) -from sklearn.utils._testing import ( - assert_allclose_dense_sparse, - assert_almost_equal, - assert_array_equal, -) class Trans(BaseEstimator): diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py index 53242b7e0277b..f0d63c00c2772 100644 --- a/sklearn/compose/tests/test_target.py +++ b/sklearn/compose/tests/test_target.py @@ -1,14 +1,25 @@ import numpy as np import pytest -from sklearn import datasets -from sklearn.base import BaseEstimator, TransformerMixin, clone -from sklearn.compose import TransformedTargetRegressor +from sklearn.base import clone +from sklearn.base import BaseEstimator +from sklearn.base import TransformerMixin + from sklearn.dummy import DummyRegressor -from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_no_warnings + +from sklearn.preprocessing import FunctionTransformer +from sklearn.preprocessing import StandardScaler + from sklearn.pipeline import Pipeline -from sklearn.preprocessing import FunctionTransformer, StandardScaler -from sklearn.utils._testing import assert_allclose, assert_no_warnings + +from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit + +from sklearn import datasets + +from sklearn.compose import TransformedTargetRegressor friedman = datasets.make_friedman1(random_state=0) diff --git a/sklearn/conftest.py b/sklearn/conftest.py index ca5d4f2ade23d..27ac720cbfe2e 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -1,27 +1,26 @@ +from os import environ +from functools import wraps import platform import sys -from functools import wraps -from os import environ -import numpy as np import pytest -from _pytest.doctest import DoctestItem +import numpy as np from threadpoolctl import threadpool_limits +from _pytest.doctest import DoctestItem -from sklearn._min_dependencies import PYTEST_MIN_VERSION -from sklearn.datasets import ( - fetch_20newsgroups, - fetch_20newsgroups_vectorized, - fetch_california_housing, - fetch_covtype, - fetch_kddcup99, - fetch_olivetti_faces, - fetch_rcv1, -) -from sklearn.tests import random_seed from sklearn.utils import _IS_32BIT from sklearn.utils._openmp_helpers import _openmp_effective_n_threads +from sklearn._min_dependencies import PYTEST_MIN_VERSION from sklearn.utils.fixes import parse_version +from sklearn.datasets import fetch_20newsgroups +from sklearn.datasets import fetch_20newsgroups_vectorized +from sklearn.datasets import fetch_california_housing +from sklearn.datasets import fetch_covtype +from sklearn.datasets import fetch_kddcup99 +from sklearn.datasets import fetch_olivetti_faces +from sklearn.datasets import fetch_rcv1 +from sklearn.tests import random_seed + if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): raise ImportError( diff --git a/sklearn/covariance/__init__.py b/sklearn/covariance/__init__.py index 8fcf8c68444e5..011fde3647145 100644 --- a/sklearn/covariance/__init__.py +++ b/sklearn/covariance/__init__.py @@ -6,23 +6,24 @@ Models. """ -from ._elliptic_envelope import EllipticEnvelope from ._empirical_covariance import ( - EmpiricalCovariance, empirical_covariance, + EmpiricalCovariance, log_likelihood, ) -from ._graph_lasso import GraphicalLasso, GraphicalLassoCV, graphical_lasso -from ._robust_covariance import MinCovDet, fast_mcd from ._shrunk_covariance import ( - OAS, - LedoitWolf, + shrunk_covariance, ShrunkCovariance, ledoit_wolf, ledoit_wolf_shrinkage, + LedoitWolf, oas, - shrunk_covariance, + OAS, ) +from ._robust_covariance import fast_mcd, MinCovDet +from ._graph_lasso import graphical_lasso, GraphicalLasso, GraphicalLassoCV +from ._elliptic_envelope import EllipticEnvelope + __all__ = [ "EllipticEnvelope", diff --git a/sklearn/covariance/_elliptic_envelope.py b/sklearn/covariance/_elliptic_envelope.py index 412f2e57e39ac..31f76fc30ca30 100644 --- a/sklearn/covariance/_elliptic_envelope.py +++ b/sklearn/covariance/_elliptic_envelope.py @@ -3,11 +3,10 @@ # License: BSD 3 clause import numpy as np - -from ..base import OutlierMixin -from ..metrics import accuracy_score +from . import MinCovDet from ..utils.validation import check_is_fitted -from ._robust_covariance import MinCovDet +from ..metrics import accuracy_score +from ..base import OutlierMixin class EllipticEnvelope(OutlierMixin, MinCovDet): diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py index a1628e6a581f5..4362a14f04f6e 100644 --- a/sklearn/covariance/_empirical_covariance.py +++ b/sklearn/covariance/_empirical_covariance.py @@ -11,15 +11,14 @@ # avoid division truncation import warnings - import numpy as np from scipy import linalg from .. import config_context from ..base import BaseEstimator -from ..metrics.pairwise import pairwise_distances from ..utils import check_array from ..utils.extmath import fast_logdet +from ..metrics.pairwise import pairwise_distances def log_likelihood(emp_cov, precision): diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py index 14a8018f358c8..de56faa8d70e1 100644 --- a/sklearn/covariance/_graph_lasso.py +++ b/sklearn/covariance/_graph_lasso.py @@ -2,29 +2,28 @@ estimator. """ -import operator -import sys -import time - # Author: Gael Varoquaux # License: BSD 3 clause # Copyright: INRIA import warnings +import operator +import sys +import time import numpy as np from scipy import linalg - from joblib import Parallel +from . import empirical_covariance, EmpiricalCovariance, log_likelihood + from ..exceptions import ConvergenceWarning +from ..utils.validation import _is_arraylike_not_scalar, check_random_state +from ..utils.fixes import delayed # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast' from ..linear_model import _cd_fast as cd_fast # type: ignore from ..linear_model import lars_path_gram from ..model_selection import check_cv, cross_val_score -from ..utils.fixes import delayed -from ..utils.validation import _is_arraylike_not_scalar, check_random_state -from . import EmpiricalCovariance, empirical_covariance, log_likelihood # Helper functions to compute the objective and dual objective functions diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py index 2133b8d673bc8..2122dd8075a1d 100644 --- a/sklearn/covariance/_robust_covariance.py +++ b/sklearn/covariance/_robust_covariance.py @@ -8,16 +8,15 @@ # # License: BSD 3 clause -import numbers import warnings - +import numbers import numpy as np from scipy import linalg from scipy.stats import chi2 -from ..utils import check_array, check_random_state +from . import empirical_covariance, EmpiricalCovariance from ..utils.extmath import fast_logdet -from ._empirical_covariance import EmpiricalCovariance, empirical_covariance +from ..utils import check_random_state, check_array # Minimum Covariance Determinant diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index 539c746e28ad7..64fce5b1db6f6 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -14,12 +14,12 @@ # avoid division truncation import warnings - import numpy as np +from . import empirical_covariance, EmpiricalCovariance from .._config import config_context from ..utils import check_array -from . import EmpiricalCovariance, empirical_covariance + # ShrunkCovariance estimator diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index 07b144e631f55..6a9031d0fcb36 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -7,22 +7,21 @@ import numpy as np import pytest +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal + from sklearn import datasets from sklearn.covariance import ( - OAS, + empirical_covariance, EmpiricalCovariance, - LedoitWolf, ShrunkCovariance, - empirical_covariance, + shrunk_covariance, + LedoitWolf, ledoit_wolf, ledoit_wolf_shrinkage, + OAS, oas, - shrunk_covariance, -) -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, ) X, _ = datasets.load_diabetes(return_X_y=True) diff --git a/sklearn/covariance/tests/test_elliptic_envelope.py b/sklearn/covariance/tests/test_elliptic_envelope.py index f547e09a7f0d6..90c059602bdae 100644 --- a/sklearn/covariance/tests/test_elliptic_envelope.py +++ b/sklearn/covariance/tests/test_elliptic_envelope.py @@ -6,12 +6,10 @@ import pytest from sklearn.covariance import EllipticEnvelope +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal from sklearn.exceptions import NotFittedError -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) def test_elliptic_envelope(): diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py index fbf894259197d..dc099deac8fe3 100644 --- a/sklearn/covariance/tests/test_graphical_lasso.py +++ b/sklearn/covariance/tests/test_graphical_lasso.py @@ -1,27 +1,26 @@ """ Test the graphical_lasso module. """ import sys -from io import StringIO +import pytest import numpy as np -import pytest -from numpy.testing import assert_allclose from scipy import linalg -from sklearn import datasets +from numpy.testing import assert_allclose +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_less +from sklearn.utils._testing import _convert_container + from sklearn.covariance import ( + graphical_lasso, GraphicalLasso, GraphicalLassoCV, empirical_covariance, - graphical_lasso, ) from sklearn.datasets import make_sparse_spd_matrix +from io import StringIO from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - _convert_container, - assert_array_almost_equal, - assert_array_less, -) +from sklearn import datasets def test_graphical_lasso(random_state=0): diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py index 0b2a2bbbff815..9bb93328b17a2 100644 --- a/sklearn/covariance/tests/test_robust_covariance.py +++ b/sklearn/covariance/tests/test_robust_covariance.py @@ -9,10 +9,12 @@ import numpy as np import pytest -from sklearn import datasets -from sklearn.covariance import MinCovDet, empirical_covariance, fast_mcd from sklearn.utils._testing import assert_array_almost_equal +from sklearn import datasets +from sklearn.covariance import empirical_covariance, MinCovDet +from sklearn.covariance import fast_mcd + X = datasets.load_iris().data X_1d = X[:, 0] n_samples, n_features = X.shape diff --git a/sklearn/cross_decomposition/__init__.py b/sklearn/cross_decomposition/__init__.py index 47b78783caf9c..ec2f5fb3049af 100644 --- a/sklearn/cross_decomposition/__init__.py +++ b/sklearn/cross_decomposition/__init__.py @@ -1,3 +1,3 @@ -from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression +from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"] diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index 16ba9b2d38dce..8a804142e13bb 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -12,18 +12,15 @@ import numpy as np from scipy.linalg import svd -from ..base import ( - BaseEstimator, - MultiOutputMixin, - RegressorMixin, - TransformerMixin, - _ClassNamePrefixFeaturesOutMixin, -) -from ..exceptions import ConvergenceWarning -from ..utils import check_array, check_consistent_length, check_scalar +from ..base import BaseEstimator, RegressorMixin, TransformerMixin +from ..base import MultiOutputMixin +from ..base import _ClassNamePrefixFeaturesOutMixin +from ..utils import check_array, check_scalar, check_consistent_length +from ..utils.fixes import sp_version +from ..utils.fixes import parse_version from ..utils.extmath import svd_flip -from ..utils.fixes import parse_version, sp_version -from ..utils.validation import FLOAT_DTYPES, check_is_fitted +from ..utils.validation import check_is_fitted, FLOAT_DTYPES +from ..exceptions import ConvergenceWarning __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"] diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index df37458fc1150..57e8a2c20abc7 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -1,20 +1,21 @@ +import pytest import warnings - import numpy as np -import pytest -from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal +from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose -from sklearn.cross_decomposition import CCA, PLSSVD, PLSCanonical, PLSRegression +from sklearn.datasets import load_linnerud from sklearn.cross_decomposition._pls import ( _center_scale_xy, _get_first_singular_vectors_power_method, _get_first_singular_vectors_svd, _svd_flip_1d, ) -from sklearn.datasets import load_linnerud, make_regression -from sklearn.exceptions import ConvergenceWarning +from sklearn.cross_decomposition import CCA +from sklearn.cross_decomposition import PLSSVD, PLSRegression, PLSCanonical +from sklearn.datasets import make_regression from sklearn.utils import check_random_state from sklearn.utils.extmath import svd_flip +from sklearn.exceptions import ConvergenceWarning def assert_matrix_orthogonal(M): diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py index 9afb74e677394..42f7b2f12ac0e 100644 --- a/sklearn/datasets/__init__.py +++ b/sklearn/datasets/__init__.py @@ -3,56 +3,53 @@ including methods to load and fetch popular reference datasets. It also features some artificial data generators. """ -from ._base import ( - clear_data_home, - get_data_home, - load_boston, - load_breast_cancer, - load_diabetes, - load_digits, - load_files, - load_iris, - load_linnerud, - load_sample_image, - load_sample_images, - load_wine, -) -from ._california_housing import fetch_california_housing +from ._base import load_breast_cancer +from ._base import load_boston +from ._base import load_diabetes +from ._base import load_digits +from ._base import load_files +from ._base import load_iris +from ._base import load_linnerud +from ._base import load_sample_images +from ._base import load_sample_image +from ._base import load_wine +from ._base import get_data_home +from ._base import clear_data_home from ._covtype import fetch_covtype from ._kddcup99 import fetch_kddcup99 -from ._lfw import fetch_lfw_pairs, fetch_lfw_people -from ._olivetti_faces import fetch_olivetti_faces +from ._lfw import fetch_lfw_pairs +from ._lfw import fetch_lfw_people +from ._twenty_newsgroups import fetch_20newsgroups +from ._twenty_newsgroups import fetch_20newsgroups_vectorized from ._openml import fetch_openml -from ._rcv1 import fetch_rcv1 -from ._samples_generator import ( - make_biclusters, - make_blobs, - make_checkerboard, - make_circles, - make_classification, - make_friedman1, - make_friedman2, - make_friedman3, - make_gaussian_quantiles, - make_hastie_10_2, - make_low_rank_matrix, - make_moons, - make_multilabel_classification, - make_regression, - make_s_curve, - make_sparse_coded_signal, - make_sparse_spd_matrix, - make_sparse_uncorrelated, - make_spd_matrix, - make_swiss_roll, -) +from ._samples_generator import make_classification +from ._samples_generator import make_multilabel_classification +from ._samples_generator import make_hastie_10_2 +from ._samples_generator import make_regression +from ._samples_generator import make_blobs +from ._samples_generator import make_moons +from ._samples_generator import make_circles +from ._samples_generator import make_friedman1 +from ._samples_generator import make_friedman2 +from ._samples_generator import make_friedman3 +from ._samples_generator import make_low_rank_matrix +from ._samples_generator import make_sparse_coded_signal +from ._samples_generator import make_sparse_uncorrelated +from ._samples_generator import make_spd_matrix +from ._samples_generator import make_swiss_roll +from ._samples_generator import make_s_curve +from ._samples_generator import make_sparse_spd_matrix +from ._samples_generator import make_gaussian_quantiles +from ._samples_generator import make_biclusters +from ._samples_generator import make_checkerboard +from ._svmlight_format_io import load_svmlight_file +from ._svmlight_format_io import load_svmlight_files +from ._svmlight_format_io import dump_svmlight_file +from ._olivetti_faces import fetch_olivetti_faces from ._species_distributions import fetch_species_distributions -from ._svmlight_format_io import ( - dump_svmlight_file, - load_svmlight_file, - load_svmlight_files, -) -from ._twenty_newsgroups import fetch_20newsgroups, fetch_20newsgroups_vectorized +from ._california_housing import fetch_california_housing +from ._rcv1 import fetch_rcv1 + __all__ = [ "clear_data_home", diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py index ed30dde25b32c..b5603853491a2 100644 --- a/sklearn/datasets/_arff_parser.py +++ b/sklearn/datasets/_arff_parser.py @@ -9,7 +9,11 @@ from ..externals import _arff from ..externals._arff import ArffSparseDataType -from ..utils import _chunk_generator, check_pandas_support, get_chunk_n_rows +from ..utils import ( + _chunk_generator, + check_pandas_support, + get_chunk_n_rows, +) def _split_sparse_columns( diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index 4b545f2f84985..367816fa4a467 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -7,23 +7,26 @@ # 2010 Olivier Grisel # License: BSD 3 clause import csv -import gzip import hashlib -import os +import gzip import shutil from collections import namedtuple -from importlib import resources +import os from os import environ, listdir, makedirs from os.path import expanduser, isdir, join, splitext +from importlib import resources from pathlib import Path -from urllib.request import urlretrieve - -import numpy as np from ..preprocessing import scale -from ..utils import Bunch, check_pandas_support, check_random_state +from ..utils import Bunch +from ..utils import check_random_state +from ..utils import check_pandas_support from ..utils.deprecation import deprecated +import numpy as np + +from urllib.request import urlretrieve + DATA_MODULE = "sklearn.datasets.data" DESCR_MODULE = "sklearn.datasets.descr" IMAGES_MODULE = "sklearn.datasets.images" diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py index a2eeae69049e5..f3f7d0e57c502 100644 --- a/sklearn/datasets/_california_housing.py +++ b/sklearn/datasets/_california_housing.py @@ -21,24 +21,23 @@ # Authors: Peter Prettenhofer # License: BSD 3 clause -import logging -import tarfile -from os import makedirs, remove from os.path import exists +from os import makedirs, remove +import tarfile import numpy as np +import logging import joblib -from ..utils import Bunch from . import get_data_home -from ._base import ( - RemoteFileMetadata, - _convert_data_dataframe, - _fetch_remote, - _pkl_filepath, - load_descr, -) +from ._base import _convert_data_dataframe +from ._base import _fetch_remote +from ._base import _pkl_filepath +from ._base import RemoteFileMetadata +from ._base import load_descr +from ..utils import Bunch + # The original data can be found at: # https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py index 5c098060da4bb..b43ea24141eed 100644 --- a/sklearn/datasets/_covtype.py +++ b/sklearn/datasets/_covtype.py @@ -14,25 +14,24 @@ # Peter Prettenhofer # License: BSD 3 clause -import logging -import os from gzip import GzipFile +import logging from os.path import exists, join +import os from tempfile import TemporaryDirectory import numpy as np - import joblib -from ..utils import Bunch, check_random_state from . import get_data_home -from ._base import ( - RemoteFileMetadata, - _convert_data_dataframe, - _fetch_remote, - _pkl_filepath, - load_descr, -) +from ._base import _convert_data_dataframe +from ._base import _fetch_remote +from ._base import RemoteFileMetadata +from ._base import load_descr +from ..utils import Bunch +from ._base import _pkl_filepath +from ..utils import check_random_state + # The original data can be found in: # https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py index a2efb8569b944..b698d299b7c8d 100644 --- a/sklearn/datasets/_kddcup99.py +++ b/sklearn/datasets/_kddcup99.py @@ -9,24 +9,23 @@ """ import errno +from gzip import GzipFile import logging import os -from gzip import GzipFile from os.path import exists, join import numpy as np - import joblib -from ..utils import Bunch, check_random_state -from ..utils import shuffle as shuffle_method +from ._base import _fetch_remote +from ._base import _convert_data_dataframe from . import get_data_home -from ._base import ( - RemoteFileMetadata, - _convert_data_dataframe, - _fetch_remote, - load_descr, -) +from ._base import RemoteFileMetadata +from ._base import load_descr +from ..utils import Bunch +from ..utils import check_random_state +from ..utils import shuffle as shuffle_method + # The original data can be found at: # https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index 00b5bc35d1eb5..dc1267af59f96 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -8,16 +8,21 @@ # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause -import logging from os import listdir, makedirs, remove -from os.path import exists, isdir, join +from os.path import join, exists, isdir -import numpy as np +import logging +import numpy as np from joblib import Memory +from ._base import ( + get_data_home, + _fetch_remote, + RemoteFileMetadata, + load_descr, +) from ..utils import Bunch -from ._base import RemoteFileMetadata, _fetch_remote, get_data_home, load_descr logger = logging.getLogger(__name__) diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py index fb6e78300ebe9..296a3868081d9 100644 --- a/sklearn/datasets/_olivetti_faces.py +++ b/sklearn/datasets/_olivetti_faces.py @@ -13,17 +13,19 @@ # Copyright (c) 2011 David Warde-Farley # License: BSD 3 clause -from os import makedirs, remove from os.path import exists +from os import makedirs, remove import numpy as np from scipy.io import loadmat - import joblib -from ..utils import Bunch, check_random_state from . import get_data_home -from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr +from ._base import _fetch_remote +from ._base import RemoteFileMetadata +from ._base import _pkl_filepath +from ._base import load_descr +from ..utils import check_random_state, Bunch # The original data can be found at: # https://cs.nyu.edu/~roweis/data/olivettifaces.mat diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index 5a32a78dc9444..5299d4d7019a0 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -7,18 +7,18 @@ from contextlib import closing from functools import wraps from os.path import join +from typing import Callable, Optional, Dict, Tuple, List, Any, Union from tempfile import TemporaryDirectory -from typing import Any, Callable, Dict, List, Optional, Tuple, Union from urllib.error import HTTPError, URLError -from urllib.request import Request, urlopen +from urllib.request import urlopen, Request from warnings import warn import numpy as np -from ..utils import check_pandas_support # noqa -from ..utils import Bunch from . import get_data_home from ._arff_parser import load_arff_from_gzip_file +from ..utils import Bunch +from ..utils import check_pandas_support # noqa __all__ = ["fetch_openml"] diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py index 20618ff5f944a..cca30afefff34 100644 --- a/sklearn/datasets/_rcv1.py +++ b/sklearn/datasets/_rcv1.py @@ -9,20 +9,24 @@ # License: BSD 3 clause import logging -from gzip import GzipFile -from os import makedirs, remove + +from os import remove, makedirs from os.path import exists, join +from gzip import GzipFile import numpy as np import scipy.sparse as sp - import joblib -from ..utils import Bunch -from ..utils import shuffle as shuffle_ from . import get_data_home -from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr +from ._base import _pkl_filepath +from ._base import _fetch_remote +from ._base import RemoteFileMetadata +from ._base import load_descr from ._svmlight_format_io import load_svmlight_files +from ..utils import shuffle as shuffle_ +from ..utils import Bunch + # The original vectorized data can be found at: # http://www.ai.mit.edu/projects/jmlr/papers/volume5/lewis04a/a13-vector-files/lyrl2004_vectors_test_pt0.dat.gz diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index 71e39799daeae..acc7a6e43b06c 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -6,14 +6,14 @@ # G. Louppe, J. Nothman # License: BSD 3 clause -import array import numbers +import array import warnings from collections.abc import Iterable import numpy as np -import scipy.sparse as sp from scipy import linalg +import scipy.sparse as sp from ..preprocessing import MultiLabelBinarizer from ..utils import check_array, check_random_state diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py index 6406d09ce5cd4..9afc6e08cd6cb 100644 --- a/sklearn/datasets/_species_distributions.py +++ b/sklearn/datasets/_species_distributions.py @@ -37,18 +37,20 @@ # # License: BSD 3 clause -import logging from io import BytesIO from os import makedirs, remove from os.path import exists +import logging import numpy as np import joblib -from ..utils import Bunch from . import get_data_home -from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath +from ._base import _fetch_remote +from ._base import RemoteFileMetadata +from ..utils import Bunch +from ._base import _pkl_filepath # The original data can be found at: # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip diff --git a/sklearn/datasets/_svmlight_format_fast.pyx b/sklearn/datasets/_svmlight_format_fast.pyx index 845d948de9b05..bf5554714c199 100644 --- a/sklearn/datasets/_svmlight_format_fast.pyx +++ b/sklearn/datasets/_svmlight_format_fast.pyx @@ -6,9 +6,8 @@ # License: BSD 3 clause import array - -cimport cython from cpython cimport array +cimport cython from libc.string cimport strchr import numpy as np diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py index 47aec0dc0e9d3..a3d1a6e587457 100644 --- a/sklearn/datasets/_svmlight_format_io.py +++ b/sklearn/datasets/_svmlight_format_io.py @@ -15,15 +15,16 @@ # Olivier Grisel # License: BSD 3 clause +from contextlib import closing import io import os.path -from contextlib import closing import numpy as np import scipy.sparse as sp from .. import __version__ -from ..utils import IS_PYPY, check_array + +from ..utils import check_array, IS_PYPY if not IS_PYPY: from ._svmlight_format_fast import _load_svmlight_file diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py index 7453e73646719..22bea7e59482d 100644 --- a/sklearn/datasets/_twenty_newsgroups.py +++ b/sklearn/datasets/_twenty_newsgroups.py @@ -24,30 +24,28 @@ # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause -import codecs -import logging import os +import logging +import tarfile import pickle -import re import shutil -import tarfile +import re +import codecs import numpy as np import scipy.sparse as sp - import joblib -from .. import preprocessing +from . import get_data_home +from . import load_files +from ._base import _convert_data_dataframe +from ._base import _pkl_filepath +from ._base import _fetch_remote +from ._base import RemoteFileMetadata +from ._base import load_descr from ..feature_extraction.text import CountVectorizer -from ..utils import Bunch, check_random_state -from . import get_data_home, load_files -from ._base import ( - RemoteFileMetadata, - _convert_data_dataframe, - _fetch_remote, - _pkl_filepath, - load_descr, -) +from .. import preprocessing +from ..utils import check_random_state, Bunch logger = logging.getLogger(__name__) diff --git a/sklearn/datasets/setup.py b/sklearn/datasets/setup.py index 8d65328e69048..a75f14a083297 100644 --- a/sklearn/datasets/setup.py +++ b/sklearn/datasets/setup.py @@ -1,8 +1,7 @@ +import numpy import os import platform -import numpy - def configuration(parent_package="", top_path=None): from numpy.distutils.misc_util import Configuration diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py index c8ab1cd04ee6e..ef1280f6218b1 100644 --- a/sklearn/datasets/tests/conftest.py +++ b/sklearn/datasets/tests/conftest.py @@ -1,7 +1,6 @@ """ Network tests are only run, if data is already locally available, or if download is specifically requested by environment variable.""" import builtins - import pytest diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py index d1d03fdca7c9a..4244dd7865945 100644 --- a/sklearn/datasets/tests/test_20news.py +++ b/sklearn/datasets/tests/test_20news.py @@ -4,17 +4,16 @@ from functools import partial from unittest.mock import patch -import numpy as np import pytest + +import numpy as np import scipy.sparse as sp -from sklearn.datasets.tests.test_common import ( - check_as_frame, - check_pandas_dependency_message, - check_return_X_y, -) -from sklearn.preprocessing import normalize +from sklearn.datasets.tests.test_common import check_as_frame +from sklearn.datasets.tests.test_common import check_pandas_dependency_message +from sklearn.datasets.tests.test_common import check_return_X_y from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.preprocessing import normalize def test_20news(fetch_20newsgroups_fxt): diff --git a/sklearn/datasets/tests/test_arff_parser.py b/sklearn/datasets/tests/test_arff_parser.py index b92ade2e3617a..3a06a3c338394 100644 --- a/sklearn/datasets/tests/test_arff_parser.py +++ b/sklearn/datasets/tests/test_arff_parser.py @@ -1,6 +1,9 @@ import pytest -from sklearn.datasets._arff_parser import _post_process_frame, load_arff_from_gzip_file +from sklearn.datasets._arff_parser import ( + _post_process_frame, + load_arff_from_gzip_file, +) @pytest.mark.parametrize( diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 270b985e10f87..2eeb2fc570094 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -2,32 +2,33 @@ import shutil import tempfile import warnings +from pickle import loads +from pickle import dumps from functools import partial from importlib import resources -from pickle import dumps, loads -import numpy as np import pytest - -from sklearn.datasets import ( - clear_data_home, - get_data_home, - load_boston, - load_breast_cancer, - load_diabetes, - load_digits, - load_files, - load_iris, - load_linnerud, - load_sample_image, - load_sample_images, - load_wine, +import numpy as np +from sklearn.datasets import get_data_home +from sklearn.datasets import clear_data_home +from sklearn.datasets import load_files +from sklearn.datasets import load_sample_images +from sklearn.datasets import load_sample_image +from sklearn.datasets import load_digits +from sklearn.datasets import load_diabetes +from sklearn.datasets import load_linnerud +from sklearn.datasets import load_iris +from sklearn.datasets import load_breast_cancer +from sklearn.datasets import load_boston +from sklearn.datasets import load_wine +from sklearn.datasets._base import ( + load_csv_data, + load_gzip_compressed_csv_data, ) -from sklearn.datasets._base import load_csv_data, load_gzip_compressed_csv_data -from sklearn.datasets.tests.test_common import check_as_frame from sklearn.preprocessing import scale from sklearn.utils import Bunch from sklearn.utils._testing import SkipTest +from sklearn.datasets.tests.test_common import check_as_frame def _remove_dir(path): diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py index 868d3d8084ecc..82a321e96a8d6 100644 --- a/sklearn/datasets/tests/test_california_housing.py +++ b/sklearn/datasets/tests/test_california_housing.py @@ -1,11 +1,10 @@ """Test the california_housing loader, if the data is available, or if specifically requested via environment variable (e.g. for travis cron job).""" -from functools import partial - import pytest from sklearn.datasets.tests.test_common import check_return_X_y +from functools import partial def test_fetch(fetch_california_housing_fxt): diff --git a/sklearn/datasets/tests/test_common.py b/sklearn/datasets/tests/test_common.py index 5f53d4e659e51..49155837be25b 100644 --- a/sklearn/datasets/tests/test_common.py +++ b/sklearn/datasets/tests/test_common.py @@ -2,8 +2,8 @@ import inspect import os -import numpy as np import pytest +import numpy as np import sklearn.datasets diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py index 72fe0fdd526f8..bbdd395a847f4 100644 --- a/sklearn/datasets/tests/test_covtype.py +++ b/sklearn/datasets/tests/test_covtype.py @@ -2,9 +2,7 @@ or if specifically requested via environment variable (e.g. for travis cron job).""" from functools import partial - import pytest - from sklearn.datasets.tests.test_common import check_return_X_y diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py index 965e1d46231e9..b935da3a26add 100644 --- a/sklearn/datasets/tests/test_kddcup99.py +++ b/sklearn/datasets/tests/test_kddcup99.py @@ -7,14 +7,11 @@ """ from functools import partial - import pytest -from sklearn.datasets.tests.test_common import ( - check_as_frame, - check_pandas_dependency_message, - check_return_X_y, -) +from sklearn.datasets.tests.test_common import check_as_frame +from sklearn.datasets.tests.test_common import check_pandas_dependency_message +from sklearn.datasets.tests.test_common import check_return_X_y @pytest.mark.parametrize("as_frame", [True, False]) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 49a8ed1d73ded..fba3949befb1a 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -8,18 +8,19 @@ joblib, successive runs will be fast (less than 200ms). """ -import os import random +import os import shutil import tempfile -from functools import partial - import numpy as np import pytest +from functools import partial +from sklearn.datasets import fetch_lfw_pairs +from sklearn.datasets import fetch_lfw_people -from sklearn.datasets import fetch_lfw_pairs, fetch_lfw_people -from sklearn.datasets.tests.test_common import check_return_X_y from sklearn.utils._testing import assert_array_equal +from sklearn.datasets.tests.test_common import check_return_X_y + SCIKIT_LEARN_DATA = None SCIKIT_LEARN_EMPTY_DATA = None diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py index 99b017bbac309..7d11516b0426c 100644 --- a/sklearn/datasets/tests/test_olivetti_faces.py +++ b/sklearn/datasets/tests/test_olivetti_faces.py @@ -4,8 +4,9 @@ import numpy as np -from sklearn.datasets.tests.test_common import check_return_X_y from sklearn.utils import Bunch +from sklearn.datasets.tests.test_common import check_return_X_y + from sklearn.utils._testing import assert_array_equal diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py index 3d0befcc9ca4c..826a07783a6b0 100644 --- a/sklearn/datasets/tests/test_openml.py +++ b/sklearn/datasets/tests/test_openml.py @@ -9,18 +9,11 @@ from urllib.error import HTTPError import numpy as np -import pytest import scipy.sparse +import pytest import sklearn from sklearn import config_context -from sklearn.datasets import fetch_openml as fetch_openml_orig -from sklearn.datasets._openml import ( - _OPENML_PREFIX, - _get_local_path, - _open_openml_url, - _retry_with_clean_cache, -) from sklearn.utils import Bunch, check_pandas_support from sklearn.utils._testing import ( SkipTest, @@ -29,6 +22,15 @@ fails_if_pypy, ) +from sklearn.datasets import fetch_openml as fetch_openml_orig +from sklearn.datasets._openml import ( + _OPENML_PREFIX, + _open_openml_url, + _get_local_path, + _retry_with_clean_cache, +) + + OPENML_TEST_DATA_MODULE = "sklearn.datasets.tests.data.openml" # if True, urlopen will be monkey patched to only use local files test_offline = True diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py index a3718a8b76bb0..cdc9f02c010c5 100644 --- a/sklearn/datasets/tests/test_rcv1.py +++ b/sklearn/datasets/tests/test_rcv1.py @@ -2,13 +2,12 @@ or if specifically requested via environment variable (e.g. for travis cron job).""" -from functools import partial - -import numpy as np import scipy.sparse as sp - +import numpy as np +from functools import partial from sklearn.datasets.tests.test_common import check_return_X_y -from sklearn.utils._testing import assert_almost_equal, assert_array_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal def test_fetch_rcv1(fetch_rcv1_fxt): diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 4eeca65d7a6d9..b464178906f04 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -6,32 +6,30 @@ import pytest import scipy.sparse as sp -from sklearn.datasets import ( - make_biclusters, - make_blobs, - make_checkerboard, - make_circles, - make_classification, - make_friedman1, - make_friedman2, - make_friedman3, - make_hastie_10_2, - make_low_rank_matrix, - make_moons, - make_multilabel_classification, - make_regression, - make_s_curve, - make_sparse_coded_signal, - make_sparse_uncorrelated, - make_spd_matrix, - make_swiss_roll, -) -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose + +from sklearn.datasets import make_classification +from sklearn.datasets import make_multilabel_classification +from sklearn.datasets import make_hastie_10_2 +from sklearn.datasets import make_regression +from sklearn.datasets import make_blobs +from sklearn.datasets import make_friedman1 +from sklearn.datasets import make_friedman2 +from sklearn.datasets import make_friedman3 +from sklearn.datasets import make_low_rank_matrix +from sklearn.datasets import make_moons +from sklearn.datasets import make_circles +from sklearn.datasets import make_sparse_coded_signal +from sklearn.datasets import make_sparse_uncorrelated +from sklearn.datasets import make_spd_matrix +from sklearn.datasets import make_swiss_roll +from sklearn.datasets import make_s_curve +from sklearn.datasets import make_biclusters +from sklearn.datasets import make_checkerboard + from sklearn.utils.validation import assert_all_finite diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index a131972bb71dc..892b6d0d43ba6 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -1,22 +1,22 @@ +from bz2 import BZ2File import gzip +from io import BytesIO +import numpy as np +import scipy.sparse as sp import os import shutil -from bz2 import BZ2File from importlib import resources -from io import BytesIO from tempfile import NamedTemporaryFile -import numpy as np import pytest -import scipy.sparse as sp + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import fails_if_pypy import sklearn -from sklearn.datasets import dump_svmlight_file, load_svmlight_file, load_svmlight_files -from sklearn.utils._testing import ( - assert_array_almost_equal, - assert_array_equal, - fails_if_pypy, -) +from sklearn.datasets import load_svmlight_file, load_svmlight_files, dump_svmlight_file + TEST_DATA_MODULE = "sklearn.datasets.tests.data" datafile = "svmlight_classification.txt" diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py index 88851a0b3e5a4..c5f323d3c5d72 100644 --- a/sklearn/decomposition/__init__.py +++ b/sklearn/decomposition/__init__.py @@ -5,24 +5,29 @@ """ -from ..utils.extmath import randomized_svd +from ._nmf import ( + NMF, + MiniBatchNMF, + non_negative_factorization, +) +from ._pca import PCA +from ._incremental_pca import IncrementalPCA +from ._kernel_pca import KernelPCA +from ._sparse_pca import SparsePCA, MiniBatchSparsePCA +from ._truncated_svd import TruncatedSVD +from ._fastica import FastICA, fastica from ._dict_learning import ( - DictionaryLearning, - MiniBatchDictionaryLearning, - SparseCoder, dict_learning, dict_learning_online, sparse_encode, + DictionaryLearning, + MiniBatchDictionaryLearning, + SparseCoder, ) from ._factor_analysis import FactorAnalysis -from ._fastica import FastICA, fastica -from ._incremental_pca import IncrementalPCA -from ._kernel_pca import KernelPCA +from ..utils.extmath import randomized_svd from ._lda import LatentDirichletAllocation -from ._nmf import NMF, MiniBatchNMF, non_negative_factorization -from ._pca import PCA -from ._sparse_pca import MiniBatchSparsePCA, SparsePCA -from ._truncated_svd import TruncatedSVD + __all__ = [ "DictionaryLearning", diff --git a/sklearn/decomposition/_base.py b/sklearn/decomposition/_base.py index dd273af67ec63..888fc3856d1b8 100644 --- a/sklearn/decomposition/_base.py +++ b/sklearn/decomposition/_base.py @@ -8,13 +8,12 @@ # # License: BSD 3 clause -from abc import ABCMeta, abstractmethod - import numpy as np from scipy import linalg from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..utils.validation import check_is_fitted +from abc import ABCMeta, abstractmethod class _BasePCA( diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 4a25838eb9c78..ad3db76bfd4b5 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -3,29 +3,25 @@ # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort # License: BSD 3 clause -import itertools -import sys import time +import sys +import itertools import warnings + from math import ceil import numpy as np from scipy import linalg - from joblib import Parallel, effective_n_jobs from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..linear_model import Lars, Lasso, LassoLars, orthogonal_mp_gram -from ..utils import ( - check_array, - check_random_state, - deprecated, - gen_batches, - gen_even_slices, -) +from ..utils import check_array, check_random_state, gen_even_slices, gen_batches +from ..utils import deprecated from ..utils.extmath import randomized_svd, row_norms, svd_flip +from ..utils.validation import check_is_fitted +from ..utils.validation import check_scalar from ..utils.fixes import delayed -from ..utils.validation import check_is_fitted, check_scalar +from ..linear_model import Lasso, orthogonal_mp_gram, LassoLars, Lars def _check_positive_coding(method, positive): diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py index 473d22446c8d9..4b8eab3492ca8 100644 --- a/sklearn/decomposition/_factor_analysis.py +++ b/sklearn/decomposition/_factor_analysis.py @@ -20,16 +20,16 @@ # License: BSD3 import warnings -from math import log, sqrt - +from math import sqrt, log import numpy as np from scipy import linalg + from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..exceptions import ConvergenceWarning from ..utils import check_random_state from ..utils.extmath import fast_logdet, randomized_svd, squared_norm from ..utils.validation import check_is_fitted +from ..exceptions import ConvergenceWarning class FactorAnalysis(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index 96da62d7a0606..490a3323344d1 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -16,7 +16,8 @@ from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..exceptions import ConvergenceWarning -from ..utils import as_float_array, check_array, check_random_state + +from ..utils import check_array, as_float_array, check_random_state from ..utils.validation import check_is_fitted __all__ = ["fastica", "FastICA"] diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py index ad36088f8a3ae..589796a7c97f7 100644 --- a/sklearn/decomposition/_incremental_pca.py +++ b/sklearn/decomposition/_incremental_pca.py @@ -7,9 +7,9 @@ import numpy as np from scipy import linalg, sparse -from ..utils import gen_batches -from ..utils.extmath import _incremental_mean_and_var, svd_flip from ._base import _BasePCA +from ..utils import gen_batches +from ..utils.extmath import svd_flip, _incremental_mean_and_var class IncrementalPCA(_BasePCA): diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py index dd3dd4c6ea18a..4e3ad720ae126 100644 --- a/sklearn/decomposition/_kernel_pca.py +++ b/sklearn/decomposition/_kernel_pca.py @@ -4,20 +4,23 @@ # Sylvain Marie # License: BSD 3 clause -import numbers - import numpy as np +import numbers from scipy import linalg from scipy.sparse.linalg import eigsh -from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..exceptions import NotFittedError -from ..metrics.pairwise import pairwise_kernels -from ..preprocessing import KernelCenterer from ..utils._arpack import _init_arpack_v0 +from ..utils.extmath import svd_flip, _randomized_eigsh +from ..utils.validation import ( + check_is_fitted, + _check_psd_eigenvalues, + check_scalar, +) from ..utils.deprecation import deprecated -from ..utils.extmath import _randomized_eigsh, svd_flip -from ..utils.validation import _check_psd_eigenvalues, check_is_fitted, check_scalar +from ..exceptions import NotFittedError +from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin +from ..preprocessing import KernelCenterer +from ..metrics.pairwise import pairwise_kernels class KernelPCA(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py index 8c3dd70f8c3bc..6db9d900566eb 100644 --- a/sklearn/decomposition/_lda.py +++ b/sklearn/decomposition/_lda.py @@ -14,17 +14,18 @@ import numpy as np import scipy.sparse as sp from scipy.special import gammaln, logsumexp - from joblib import Parallel, effective_n_jobs from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..utils import check_random_state, gen_batches, gen_even_slices +from ..utils.validation import check_non_negative +from ..utils.validation import check_is_fitted from ..utils.fixes import delayed -from ..utils.validation import check_is_fitted, check_non_negative + from ._online_lda_fast import ( + mean_change, _dirichlet_expectation_1d, _dirichlet_expectation_2d, - mean_change, ) EPS = np.finfo(float).eps diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 81a3a088cf791..7623822ba5912 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -6,23 +6,25 @@ # Tom Dupre la Tour # License: BSD 3 clause -import itertools import numbers +import numpy as np +import scipy.sparse as sp import time +import itertools import warnings from math import sqrt - -import numpy as np -import scipy.sparse as sp from scipy import linalg +from ._cdnmf_fast import _update_cdnmf_fast from .._config import config_context from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..exceptions import ConvergenceWarning -from ..utils import check_array, check_random_state, gen_batches +from ..utils import check_random_state, check_array, gen_batches from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm -from ..utils.validation import check_is_fitted, check_non_negative -from ._cdnmf_fast import _update_cdnmf_fast +from ..utils.validation import ( + check_is_fitted, + check_non_negative, +) EPSILON = np.finfo(np.float32).eps diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py index e4367640667ca..635e119ae445d 100644 --- a/sklearn/decomposition/_pca.py +++ b/sklearn/decomposition/_pca.py @@ -10,20 +10,21 @@ # # License: BSD 3 clause -import numbers from math import log, sqrt +import numbers import numpy as np from scipy import linalg +from scipy.special import gammaln from scipy.sparse import issparse from scipy.sparse.linalg import svds -from scipy.special import gammaln +from ._base import _BasePCA from ..utils import check_random_state, check_scalar from ..utils._arpack import _init_arpack_v0 -from ..utils.extmath import fast_logdet, randomized_svd, stable_cumsum, svd_flip +from ..utils.extmath import fast_logdet, randomized_svd, svd_flip +from ..utils.extmath import stable_cumsum from ..utils.validation import check_is_fitted -from ._base import _BasePCA def _assess_dimension(spectrum, rank, n_samples): diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index a8aa44ea488fe..a36bfbfd529d0 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -6,10 +6,10 @@ import numpy as np -from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..linear_model import ridge_regression from ..utils import check_random_state from ..utils.validation import check_is_fitted +from ..linear_model import ridge_regression +from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ._dict_learning import dict_learning, dict_learning_online diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py index 367ab9058b654..b8417543783d4 100644 --- a/sklearn/decomposition/_truncated_svd.py +++ b/sklearn/decomposition/_truncated_svd.py @@ -7,7 +7,6 @@ # License: 3-clause BSD. from numbers import Integral - import numpy as np import scipy.sparse as sp from scipy.sparse.linalg import svds diff --git a/sklearn/decomposition/setup.py b/sklearn/decomposition/setup.py index c32eeadffe9e1..2937f282b755d 100644 --- a/sklearn/decomposition/setup.py +++ b/sklearn/decomposition/setup.py @@ -1,5 +1,4 @@ import os - import numpy from numpy.distutils.misc_util import Configuration diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index 0910edde825e1..b53148cddec57 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -1,34 +1,34 @@ -import itertools +import pytest import warnings -from functools import partial import numpy as np -import pytest +from functools import partial +import itertools from sklearn.base import clone -from sklearn.decomposition import ( - DictionaryLearning, - MiniBatchDictionaryLearning, - SparseCoder, - dict_learning, - dict_learning_online, - sparse_encode, -) -from sklearn.decomposition._dict_learning import _update_dict + from sklearn.exceptions import ConvergenceWarning + from sklearn.utils import check_array -from sklearn.utils._testing import ( - TempMemmap, - assert_allclose, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) -from sklearn.utils.estimator_checks import ( - check_transformer_data_not_an_array, - check_transformer_general, - check_transformers_unfitted, -) + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import TempMemmap + +from sklearn.decomposition import DictionaryLearning +from sklearn.decomposition import MiniBatchDictionaryLearning +from sklearn.decomposition import SparseCoder +from sklearn.decomposition import dict_learning +from sklearn.decomposition import dict_learning_online +from sklearn.decomposition import sparse_encode +from sklearn.utils.estimator_checks import check_transformer_data_not_an_array +from sklearn.utils.estimator_checks import check_transformer_general +from sklearn.utils.estimator_checks import check_transformers_unfitted + +from sklearn.decomposition._dict_learning import _update_dict + rng_global = np.random.RandomState(0) n_samples, n_features = 10, 8 @@ -401,8 +401,8 @@ def test_dict_learning_online_positivity(positive_code, positive_dict): def test_dict_learning_online_verbosity(): # test verbosity for better coverage n_components = 5 - import sys from io import StringIO + import sys old_stdout = sys.stdout try: diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py index 7ffac87f4f5a1..89ef433521e09 100644 --- a/sklearn/decomposition/tests/test_factor_analysis.py +++ b/sklearn/decomposition/tests/test_factor_analysis.py @@ -7,14 +7,12 @@ import numpy as np import pytest +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.exceptions import ConvergenceWarning from sklearn.decomposition import FactorAnalysis +from sklearn.utils._testing import ignore_warnings from sklearn.decomposition._factor_analysis import _ortho_rotation -from sklearn.exceptions import ConvergenceWarning -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - ignore_warnings, -) # Ignore warnings from switching to more power iterations in randomized_svd diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index f4d90514be96c..082b7d68dee79 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -2,16 +2,18 @@ Test the fastica algorithm. """ import itertools +import pytest import warnings import numpy as np -import pytest from scipy import stats -from sklearn.decomposition import PCA, FastICA, fastica +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose + +from sklearn.decomposition import FastICA, fastica, PCA from sklearn.decomposition._fastica import _gs_decorrelation from sklearn.exceptions import ConvergenceWarning -from sklearn.utils._testing import assert_allclose, assert_array_equal def center_and_norm(x, axis=-1): diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index 6db12280d21f1..f74d608c45b40 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -1,18 +1,17 @@ """Tests for Incremental PCA.""" -import warnings - import numpy as np import pytest +import warnings + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose_dense_sparse from numpy.testing import assert_array_equal -from scipy import sparse from sklearn import datasets from sklearn.decomposition import PCA, IncrementalPCA -from sklearn.utils._testing import ( - assert_allclose_dense_sparse, - assert_almost_equal, - assert_array_almost_equal, -) + +from scipy import sparse iris = datasets.load_iris() diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index 46127033c28dd..167ce84dc9082 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -1,22 +1,23 @@ -import warnings - import numpy as np -import pytest import scipy.sparse as sp +import pytest +import warnings + +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_array_equal, + assert_allclose, +) -from sklearn.datasets import make_blobs, make_circles from sklearn.decomposition import PCA, KernelPCA +from sklearn.datasets import make_circles +from sklearn.datasets import make_blobs from sklearn.exceptions import NotFittedError from sklearn.linear_model import Perceptron -from sklearn.metrics.pairwise import rbf_kernel -from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - assert_array_equal, -) +from sklearn.model_selection import GridSearchCV +from sklearn.metrics.pairwise import rbf_kernel from sklearn.utils.validation import _check_psd_eigenvalues diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 0453d56a7c135..9f3df5b64a803 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -3,24 +3,24 @@ from io import StringIO import numpy as np -import pytest import scipy.sparse as sp + from scipy import linalg +from sklearn.decomposition import NMF, MiniBatchNMF +from sklearn.decomposition import non_negative_factorization +from sklearn.decomposition import _nmf as nmf # For testing internals from scipy.sparse import csc_matrix +import pytest + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.extmath import squared_norm from sklearn.base import clone -from sklearn.decomposition import NMF, MiniBatchNMF -from sklearn.decomposition import _nmf as nmf # For testing internals -from sklearn.decomposition import non_negative_factorization from sklearn.exceptions import ConvergenceWarning -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) -from sklearn.utils.extmath import squared_norm @pytest.mark.parametrize( diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index 10c6049099be7..e3ce951f7b6da 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -1,25 +1,26 @@ import sys -from io import StringIO import numpy as np -import pytest -from numpy.testing import assert_array_equal from scipy.linalg import block_diag from scipy.sparse import csr_matrix from scipy.special import psi +from numpy.testing import assert_array_equal + +import pytest from sklearn.decomposition import LatentDirichletAllocation from sklearn.decomposition._lda import ( _dirichlet_expectation_1d, _dirichlet_expectation_2d, ) + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import if_safe_multiprocessing_with_blas + from sklearn.exceptions import NotFittedError -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - if_safe_multiprocessing_with_blas, -) +from io import StringIO def _build_sparse_mtx(): diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py index 406661005f197..a7202335041ce 100644 --- a/sklearn/decomposition/tests/test_pca.py +++ b/sklearn/decomposition/tests/test_pca.py @@ -1,15 +1,17 @@ -import warnings - import numpy as np -import pytest import scipy as sp from numpy.testing import assert_array_equal +import pytest +import warnings + +from sklearn.utils._testing import assert_allclose + from sklearn import datasets -from sklearn.datasets import load_iris from sklearn.decomposition import PCA -from sklearn.decomposition._pca import _assess_dimension, _infer_dimension -from sklearn.utils._testing import assert_allclose +from sklearn.datasets import load_iris +from sklearn.decomposition._pca import _assess_dimension +from sklearn.decomposition._pca import _infer_dimension iris = datasets.load_iris() PCA_SOLVERS = ["full", "arpack", "randomized", "auto"] diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index daff00be56687..db92ec582abdd 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -2,18 +2,17 @@ # License: BSD 3 clause import sys +import pytest import numpy as np -import pytest from numpy.testing import assert_array_equal -from sklearn.decomposition import PCA, MiniBatchSparsePCA, SparsePCA +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import if_safe_multiprocessing_with_blas + +from sklearn.decomposition import SparsePCA, MiniBatchSparsePCA, PCA from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - if_safe_multiprocessing_with_blas, -) def generate_toy_data(n_components, n_samples, image_size, random_state=None): diff --git a/sklearn/decomposition/tests/test_truncated_svd.py b/sklearn/decomposition/tests/test_truncated_svd.py index 4edb7d4a11109..bd0bde6e08aa7 100644 --- a/sklearn/decomposition/tests/test_truncated_svd.py +++ b/sklearn/decomposition/tests/test_truncated_svd.py @@ -1,12 +1,13 @@ """Test truncated SVD transformer.""" import numpy as np -import pytest import scipy.sparse as sp -from sklearn.decomposition import PCA, TruncatedSVD +import pytest + +from sklearn.decomposition import TruncatedSVD, PCA from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_allclose, assert_array_less +from sklearn.utils._testing import assert_array_less, assert_allclose SVD_SOLVERS = ["arpack", "randomized"] diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index a69d6195d124f..d06489fa8fd63 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -10,24 +10,21 @@ # License: BSD 3-Clause import warnings -from numbers import Real - import numpy as np from scipy import linalg from scipy.special import expit +from numbers import Real -from .base import ( - BaseEstimator, - ClassifierMixin, - TransformerMixin, - _ClassNamePrefixFeaturesOutMixin, -) -from .covariance import empirical_covariance, ledoit_wolf, shrunk_covariance +from .base import BaseEstimator, TransformerMixin, ClassifierMixin +from .base import _ClassNamePrefixFeaturesOutMixin from .linear_model._base import LinearClassifierMixin -from .preprocessing import StandardScaler -from .utils.extmath import softmax -from .utils.multiclass import check_classification_targets, unique_labels +from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance +from .utils.multiclass import unique_labels from .utils.validation import check_is_fitted +from .utils.multiclass import check_classification_targets +from .utils.extmath import softmax +from .preprocessing import StandardScaler + __all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"] diff --git a/sklearn/dummy.py b/sklearn/dummy.py index a47ed21dcc444..7b31ee226664c 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -4,22 +4,20 @@ # License: BSD 3 clause import warnings - import numpy as np import scipy.sparse as sp -from .base import BaseEstimator, ClassifierMixin, MultiOutputMixin, RegressorMixin -from .utils import check_random_state, deprecated -from .utils.multiclass import class_distribution +from .base import BaseEstimator, ClassifierMixin, RegressorMixin +from .base import MultiOutputMixin +from .utils import check_random_state +from .utils import deprecated +from .utils.validation import _num_samples +from .utils.validation import check_array +from .utils.validation import check_consistent_length +from .utils.validation import check_is_fitted, _check_sample_weight from .utils.random import _random_choice_csc from .utils.stats import _weighted_percentile -from .utils.validation import ( - _check_sample_weight, - _num_samples, - check_array, - check_consistent_length, - check_is_fitted, -) +from .utils.multiclass import class_distribution class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator): diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py index f4a3756bdaf1d..e892d36a0ce46 100644 --- a/sklearn/ensemble/__init__.py +++ b/sklearn/ensemble/__init__.py @@ -2,24 +2,27 @@ The :mod:`sklearn.ensemble` module includes ensemble-based methods for classification, regression and anomaly detection. """ -from ._bagging import BaggingClassifier, BaggingRegressor from ._base import BaseEnsemble -from ._forest import ( - ExtraTreesClassifier, - ExtraTreesRegressor, - RandomForestClassifier, - RandomForestRegressor, - RandomTreesEmbedding, -) -from ._gb import GradientBoostingClassifier, GradientBoostingRegressor +from ._forest import RandomForestClassifier +from ._forest import RandomForestRegressor +from ._forest import RandomTreesEmbedding +from ._forest import ExtraTreesClassifier +from ._forest import ExtraTreesRegressor +from ._bagging import BaggingClassifier +from ._bagging import BaggingRegressor +from ._iforest import IsolationForest +from ._weight_boosting import AdaBoostClassifier +from ._weight_boosting import AdaBoostRegressor +from ._gb import GradientBoostingClassifier +from ._gb import GradientBoostingRegressor +from ._voting import VotingClassifier +from ._voting import VotingRegressor +from ._stacking import StackingClassifier +from ._stacking import StackingRegressor from ._hist_gradient_boosting.gradient_boosting import ( - HistGradientBoostingClassifier, HistGradientBoostingRegressor, + HistGradientBoostingClassifier, ) -from ._iforest import IsolationForest -from ._stacking import StackingClassifier, StackingRegressor -from ._voting import VotingClassifier, VotingRegressor -from ._weight_boosting import AdaBoostClassifier, AdaBoostRegressor __all__ = [ "BaseEnsemble", diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index 5052f940eb79a..9c8faa783c788 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -6,24 +6,25 @@ import itertools import numbers +import numpy as np from abc import ABCMeta, abstractmethod -from functools import partial from warnings import warn - -import numpy as np +from functools import partial from joblib import Parallel +from ._base import BaseEnsemble, _partition_estimators from ..base import ClassifierMixin, RegressorMixin -from ..metrics import accuracy_score, r2_score +from ..metrics import r2_score, accuracy_score from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import check_random_state, column_or_1d, deprecated, indices_to_mask -from ..utils.fixes import delayed +from ..utils import check_random_state, column_or_1d, deprecated +from ..utils import indices_to_mask from ..utils.metaestimators import available_if from ..utils.multiclass import check_classification_targets from ..utils.random import sample_without_replacement -from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter -from ._base import BaseEnsemble, _partition_estimators +from ..utils.validation import has_fit_parameter, check_is_fitted, _check_sample_weight +from ..utils.fixes import delayed + __all__ = ["BaggingClassifier", "BaggingRegressor"] diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py index e89b82c185648..6e97bef0678a2 100644 --- a/sklearn/ensemble/_base.py +++ b/sklearn/ensemble/_base.py @@ -3,22 +3,26 @@ # Authors: Gilles Louppe # License: BSD 3 clause -import numbers from abc import ABCMeta, abstractmethod +import numbers from typing import List import numpy as np from joblib import effective_n_jobs -from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier, is_regressor +from ..base import clone +from ..base import is_classifier, is_regressor +from ..base import BaseEstimator +from ..base import MetaEstimatorMixin from ..tree import ( - BaseDecisionTree, - DecisionTreeClassifier, DecisionTreeRegressor, ExtraTreeRegressor, + BaseDecisionTree, + DecisionTreeClassifier, ) -from ..utils import Bunch, _print_elapsed_time, check_random_state +from ..utils import Bunch, _print_elapsed_time +from ..utils import check_random_state from ..utils.metaestimators import _BaseComposition diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index b8b2787e8bc0a..919586001c58e 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -41,24 +41,18 @@ class calls the ``fit`` method of each sub-estimator on random samples import numbers -import threading -from abc import ABCMeta, abstractmethod from warnings import catch_warnings, simplefilter, warn +import threading +from abc import ABCMeta, abstractmethod import numpy as np -from scipy.sparse import hstack as sparse_hstack from scipy.sparse import issparse - +from scipy.sparse import hstack as sparse_hstack from joblib import Parallel -from ..base import ( - ClassifierMixin, - MultiOutputMixin, - RegressorMixin, - TransformerMixin, - is_classifier, -) -from ..exceptions import DataConversionWarning +from ..base import is_classifier +from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin + from ..metrics import accuracy_score, r2_score from ..preprocessing import OneHotEncoder from ..tree import ( @@ -67,17 +61,19 @@ class calls the ``fit`` method of each sub-estimator on random samples ExtraTreeClassifier, ExtraTreeRegressor, ) -from ..tree._tree import DOUBLE, DTYPE +from ..tree._tree import DTYPE, DOUBLE from ..utils import check_random_state, compute_sample_weight, deprecated +from ..exceptions import DataConversionWarning +from ._base import BaseEnsemble, _partition_estimators from ..utils.fixes import delayed from ..utils.multiclass import check_classification_targets, type_of_target from ..utils.validation import ( - _check_feature_names_in, - _check_sample_weight, - _num_samples, check_is_fitted, + _check_sample_weight, + _check_feature_names_in, ) -from ._base import BaseEnsemble, _partition_estimators +from ..utils.validation import _num_samples + __all__ = [ "RandomForestClassifier", diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 3ea2f7c450bcf..9b776a7feab10 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -20,31 +20,41 @@ # Arnaud Joly, Jacob Schreiber # License: BSD 3 clause -import numbers +from abc import ABCMeta +from abc import abstractmethod import warnings -from abc import ABCMeta, abstractmethod -from time import time +from ._base import BaseEnsemble +from ..base import ClassifierMixin +from ..base import RegressorMixin +from ..base import BaseEstimator +from ..base import is_classifier +from ..utils import deprecated + +from ._gradient_boosting import predict_stages +from ._gradient_boosting import predict_stage +from ._gradient_boosting import _random_sample_mask + +import numbers import numpy as np -from scipy.sparse import csc_matrix, csr_matrix, issparse -from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, is_classifier -from ..exceptions import NotFittedError +from scipy.sparse import csc_matrix +from scipy.sparse import csr_matrix +from scipy.sparse import issparse + +from time import time from ..model_selection import train_test_split from ..tree import DecisionTreeRegressor -from ..tree._tree import DOUBLE, DTYPE -from ..utils import ( - check_array, - check_random_state, - check_scalar, - column_or_1d, - deprecated, -) -from ..utils.multiclass import check_classification_targets -from ..utils.validation import _check_sample_weight, check_is_fitted +from ..tree._tree import DTYPE, DOUBLE from . import _gb_losses -from ._base import BaseEnsemble -from ._gradient_boosting import _random_sample_mask, predict_stage, predict_stages + +from ..utils import check_random_state +from ..utils import check_array +from ..utils import check_scalar +from ..utils import column_or_1d +from ..utils.validation import check_is_fitted, _check_sample_weight +from ..utils.multiclass import check_classification_targets +from ..exceptions import NotFittedError class VerboseReporter: diff --git a/sklearn/ensemble/_gb_losses.py b/sklearn/ensemble/_gb_losses.py index b89552bc706e8..f6b5167d5128c 100644 --- a/sklearn/ensemble/_gb_losses.py +++ b/sklearn/ensemble/_gb_losses.py @@ -2,14 +2,16 @@ decision trees. """ -from abc import ABCMeta, abstractmethod +from abc import ABCMeta +from abc import abstractmethod import numpy as np from scipy.special import expit, logsumexp -from ..dummy import DummyClassifier, DummyRegressor from ..tree._tree import TREE_LEAF from ..utils.stats import _weighted_percentile +from ..dummy import DummyClassifier +from ..dummy import DummyRegressor class LossFunction(metaclass=ABCMeta): diff --git a/sklearn/ensemble/_gradient_boosting.pyx b/sklearn/ensemble/_gradient_boosting.pyx index 6baee8581b307..624d3e165502a 100644 --- a/sklearn/ensemble/_gradient_boosting.pyx +++ b/sklearn/ensemble/_gradient_boosting.pyx @@ -3,18 +3,22 @@ # License: BSD 3 clause cimport cython + from libc.stdlib cimport free from libc.string cimport memset import numpy as np - cimport numpy as np - np.import_array() -from scipy.sparse import csr_matrix, issparse +from scipy.sparse import issparse +from scipy.sparse import csr_matrix -from ..tree._tree cimport DTYPE_t, INT32_t, Node, SIZE_t, Tree +from ..tree._tree cimport Node +from ..tree._tree cimport Tree +from ..tree._tree cimport DTYPE_t +from ..tree._tree cimport SIZE_t +from ..tree._tree cimport INT32_t from ..tree._utils cimport safe_realloc ctypedef np.int32_t int32 @@ -22,11 +26,10 @@ ctypedef np.float64_t float64 ctypedef np.uint8_t uint8 # no namespace lookup for numpy dtype and array creation - +from numpy import zeros as np_zeros +from numpy import ones as np_ones from numpy import float32 as np_float32 from numpy import float64 as np_float64 -from numpy import ones as np_ones -from numpy import zeros as np_zeros # constant to mark tree leafs diff --git a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx index 26085e4ac1013..3f0276b589bd9 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx @@ -3,14 +3,11 @@ cimport cython import numpy as np - from numpy.math cimport INFINITY - from cython.parallel import prange - from libc.math cimport isnan -from .common cimport X_BINNED_DTYPE_C, X_DTYPE_C +from .common cimport X_DTYPE_C, X_BINNED_DTYPE_C def _map_to_bins(const X_DTYPE_C [:, :] data, diff --git a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd index 488b66d300631..4aea8276c4398 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd +++ b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd @@ -1,5 +1,7 @@ -from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, X_BINNED_DTYPE_C, X_DTYPE_C - +from .common cimport X_BINNED_DTYPE_C +from .common cimport BITSET_DTYPE_C +from .common cimport BITSET_INNER_DTYPE_C +from .common cimport X_DTYPE_C cdef void init_bitset(BITSET_DTYPE_C bitset) nogil diff --git a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx index 249585aaf22f1..0d3b630f3314f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx @@ -1,4 +1,8 @@ -from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, X_BINNED_DTYPE_C, X_DTYPE_C +from .common cimport BITSET_INNER_DTYPE_C +from .common cimport BITSET_DTYPE_C +from .common cimport X_DTYPE_C +from .common cimport X_BINNED_DTYPE_C + # A bitset is a data structure used to represent sets of integers in [0, n]. We # use them to represent sets of features indices (e.g. features that go to the diff --git a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx index daf081bb926b4..bc27278ba9a1a 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx @@ -1,12 +1,10 @@ # Author: Nicolas Hug cimport cython - -import numpy as np from cython.parallel import prange +import numpy as np from .common import Y_DTYPE - from .common cimport Y_DTYPE_C diff --git a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx index bd3a9e8ed86a7..461747b3b1323 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx @@ -1,21 +1,19 @@ # Author: Nicolas Hug cimport cython - from cython.parallel import prange - from libc.math cimport isnan - import numpy as np - from numpy.math cimport INFINITY -from .common cimport X_DTYPE_C, Y_DTYPE_C - +from .common cimport X_DTYPE_C +from .common cimport Y_DTYPE_C from .common import Y_DTYPE - +from .common cimport X_BINNED_DTYPE_C +from .common cimport BITSET_INNER_DTYPE_C +from .common cimport BITSET_DTYPE_C +from .common cimport node_struct from ._bitset cimport in_bitset_2d_memoryview -from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, X_BINNED_DTYPE_C, node_struct def _predict_from_raw_data( # raw data = non-binned data diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index e8fb46eb06265..a553a307d262b 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -9,14 +9,14 @@ import numpy as np +from ...utils import check_random_state, check_array from ...base import BaseEstimator, TransformerMixin -from ...utils import check_array, check_random_state -from ...utils._openmp_helpers import _openmp_effective_n_threads -from ...utils.fixes import percentile from ...utils.validation import check_is_fitted +from ...utils.fixes import percentile +from ...utils._openmp_helpers import _openmp_effective_n_threads from ._binning import _map_to_bins +from .common import X_DTYPE, X_BINNED_DTYPE, ALMOST_INF, X_BITSET_INNER_DTYPE from ._bitset import set_bitset_memoryview -from .common import ALMOST_INF, X_BINNED_DTYPE, X_BITSET_INNER_DTYPE, X_DTYPE def _find_binning_thresholds(col_data, max_bins): diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index b767111fdb92d..e36f1beb86dd8 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1,40 +1,41 @@ """Fast Gradient Boosting decision trees for classification and regression.""" # Author: Nicolas Hug -import warnings from abc import ABC, abstractmethod from functools import partial -from timeit import default_timer as time +import warnings import numpy as np - +from timeit import default_timer as time from ..._loss.loss import ( _LOSSES, - AbsoluteError, BaseLoss, + AbsoluteError, HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss, HalfSquaredError, PinballLoss, ) -from ...base import BaseEstimator, ClassifierMixin, RegressorMixin, is_classifier -from ...metrics import check_scoring -from ...model_selection import train_test_split -from ...preprocessing import LabelEncoder +from ...base import BaseEstimator, RegressorMixin, ClassifierMixin, is_classifier from ...utils import check_random_state, resample -from ...utils._openmp_helpers import _openmp_effective_n_threads -from ...utils.multiclass import check_classification_targets from ...utils.validation import ( - _check_sample_weight, - check_consistent_length, check_is_fitted, + check_consistent_length, + _check_sample_weight, ) +from ...utils._openmp_helpers import _openmp_effective_n_threads +from ...utils.multiclass import check_classification_targets +from ...metrics import check_scoring +from ...model_selection import train_test_split +from ...preprocessing import LabelEncoder from ._gradient_boosting import _update_raw_predictions +from .common import Y_DTYPE, X_DTYPE, G_H_DTYPE + from .binning import _BinMapper -from .common import G_H_DTYPE, X_DTYPE, Y_DTYPE from .grower import TreeGrower + _LOSSES = _LOSSES.copy() # TODO(1.2): Remove "least_squares" and "least_absolute_deviation" # TODO(1.3): Remove "binary_crossentropy" and "categorical_crossentropy" diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py index 4d2550ee19242..1733b5745f8a2 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py @@ -6,25 +6,22 @@ """ # Author: Nicolas Hug -import numbers -from heapq import heappop, heappush -from timeit import default_timer as time - +from heapq import heappush, heappop import numpy as np +from timeit import default_timer as time +import numbers -from sklearn.utils._openmp_helpers import _openmp_effective_n_threads - -from ._bitset import set_raw_bitset_from_binned_bitset -from .common import ( - PREDICTOR_RECORD_DTYPE, - X_BITSET_INNER_DTYPE, - Y_DTYPE, - MonotonicConstraint, -) +from .splitting import Splitter from .histogram import HistogramBuilder from .predictor import TreePredictor -from .splitting import Splitter from .utils import sum_parallel +from .common import PREDICTOR_RECORD_DTYPE +from .common import X_BITSET_INNER_DTYPE +from .common import Y_DTYPE +from .common import MonotonicConstraint +from ._bitset import set_raw_bitset_from_binned_bitset +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + EPS = np.finfo(Y_DTYPE).eps # to avoid zero division errors diff --git a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx index 83fda75c575b0..cd4b999dd0d26 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx @@ -3,13 +3,15 @@ # Author: Nicolas Hug cimport cython +from cython.parallel import prange import numpy as np -from cython.parallel import prange from .common import HISTOGRAM_DTYPE +from .common cimport hist_struct +from .common cimport X_BINNED_DTYPE_C +from .common cimport G_H_DTYPE_C -from .common cimport G_H_DTYPE_C, X_BINNED_DTYPE_C, hist_struct # Notes: # - IN views are read-only, OUT views are write-only @@ -178,7 +180,7 @@ cdef class HistogramBuilder: unsigned char hessians_are_constant = \ self.hessians_are_constant unsigned int bin_idx = 0 - + for bin_idx in range(self.n_bins): histograms[feature_idx, bin_idx].sum_gradients = 0. histograms[feature_idx, bin_idx].sum_hessians = 0. diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py index 600e55e43467f..746fa34753121 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py @@ -5,12 +5,10 @@ import numpy as np -from ._predictor import ( - _compute_partial_dependence, - _predict_from_binned_data, - _predict_from_raw_data, -) from .common import Y_DTYPE +from ._predictor import _predict_from_raw_data +from ._predictor import _predict_from_binned_data +from ._predictor import _compute_partial_dependence class TreePredictor: diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx index 4deedc5b02afb..06f6c9344d205 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx @@ -8,20 +8,22 @@ # Author: Nicolas Hug cimport cython - -import numpy as np from cython.parallel import prange - -from libc.stdlib cimport free, malloc, qsort +import numpy as np +from libc.stdlib cimport malloc, free, qsort from libc.string cimport memcpy from numpy.math cimport INFINITY -from .common cimport X_BINNED_DTYPE_C, Y_DTYPE_C, hist_struct - +from .common cimport X_BINNED_DTYPE_C +from .common cimport Y_DTYPE_C +from .common cimport hist_struct from .common import HISTOGRAM_DTYPE - -from ._bitset cimport in_bitset, init_bitset, set_bitset -from .common cimport BITSET_DTYPE_C, BITSET_INNER_DTYPE_C, MonotonicConstraint +from .common cimport BITSET_INNER_DTYPE_C +from .common cimport BITSET_DTYPE_C +from .common cimport MonotonicConstraint +from ._bitset cimport init_bitset +from ._bitset cimport set_bitset +from ._bitset cimport in_bitset cdef struct split_info_struct: diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py index 6c318ce22a2be..4581173fefe67 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py @@ -1,17 +1,15 @@ import numpy as np +from numpy.testing import assert_array_equal, assert_allclose import pytest -from numpy.testing import assert_allclose, assert_array_equal from sklearn.ensemble._hist_gradient_boosting.binning import ( _BinMapper, _find_binning_thresholds, _map_to_bins, ) -from sklearn.ensemble._hist_gradient_boosting.common import ( - ALMOST_INF, - X_BINNED_DTYPE, - X_DTYPE, -) +from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py index c02d66b666f80..e058781cefcef 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py @@ -1,10 +1,10 @@ -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose from sklearn.ensemble._hist_gradient_boosting._bitset import ( - in_bitset_memoryview, set_bitset_memoryview, + in_bitset_memoryview, set_raw_bitset_from_binned_bitset, ) from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py index ca82bf367f09a..f5c373ed84558 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py @@ -1,15 +1,13 @@ +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score +from sklearn.datasets import make_classification, make_regression import numpy as np import pytest -from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import ( - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, -) +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator -from sklearn.metrics import accuracy_score -from sklearn.model_selection import train_test_split @pytest.mark.parametrize("seed", range(5)) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 1fc9b2f867da9..efa1ac1a4d762 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -3,32 +3,33 @@ import numpy as np import pytest from numpy.testing import assert_allclose, assert_array_equal - from sklearn._loss.loss import ( AbsoluteError, HalfBinomialLoss, HalfSquaredError, PinballLoss, ) -from sklearn.base import BaseEstimator, TransformerMixin, clone, is_regressor -from sklearn.compose import make_column_transformer -from sklearn.datasets import make_classification, make_low_rank_matrix, make_regression +from sklearn.datasets import make_classification, make_regression +from sklearn.datasets import make_low_rank_matrix +from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder +from sklearn.model_selection import train_test_split, cross_val_score +from sklearn.base import clone, BaseEstimator, TransformerMixin +from sklearn.base import is_regressor +from sklearn.pipeline import make_pipeline +from sklearn.metrics import mean_poisson_deviance from sklearn.dummy import DummyRegressor -from sklearn.ensemble import ( - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, -) +from sklearn.exceptions import NotFittedError +from sklearn.compose import make_column_transformer + +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower -from sklearn.exceptions import NotFittedError -from sklearn.metrics import mean_poisson_deviance -from sklearn.model_selection import cross_val_score, train_test_split -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder from sklearn.utils import shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads + n_threads = _openmp_effective_n_threads() X_classification, y_classification = make_classification(random_state=0) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py index 4453afca157bc..2f1998c868f41 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py @@ -1,18 +1,17 @@ import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_equal from pytest import approx +from numpy.testing import assert_array_equal +from numpy.testing import assert_allclose -from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper -from sklearn.ensemble._hist_gradient_boosting.common import ( - G_H_DTYPE, - X_BINNED_DTYPE, - X_BITSET_INNER_DTYPE, - X_DTYPE, - Y_DTYPE, -) -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower from sklearn.preprocessing import OneHotEncoder +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper +from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py index 99f74b0f542ee..1d5963d20739b 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py @@ -1,20 +1,20 @@ import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_equal -from sklearn.ensemble._hist_gradient_boosting.common import ( - G_H_DTYPE, - HISTOGRAM_DTYPE, - X_BINNED_DTYPE, -) +from numpy.testing import assert_allclose +from numpy.testing import assert_array_equal + from sklearn.ensemble._hist_gradient_boosting.histogram import ( - _build_histogram, _build_histogram_naive, + _build_histogram, _build_histogram_no_hessian, - _build_histogram_root, _build_histogram_root_no_hessian, + _build_histogram_root, _subtract_histograms, ) +from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE @pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram]) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py index afceedee624e0..4ab65c55a8620 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py @@ -1,21 +1,17 @@ import numpy as np import pytest -from sklearn.ensemble import ( - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, -) -from sklearn.ensemble._hist_gradient_boosting.common import ( - G_H_DTYPE, - X_BINNED_DTYPE, - MonotonicConstraint, -) from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower -from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder +from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint from sklearn.ensemble._hist_gradient_boosting.splitting import ( Splitter, compute_node_value, ) +from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py index 3c3c9ae81bac2..856ab180459d2 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py @@ -1,25 +1,25 @@ import numpy as np -import pytest from numpy.testing import assert_allclose - from sklearn.datasets import make_regression -from sklearn.ensemble._hist_gradient_boosting._bitset import ( - set_bitset_memoryview, - set_raw_bitset_from_binned_bitset, -) +from sklearn.model_selection import train_test_split +from sklearn.metrics import r2_score +import pytest + from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper +from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower +from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor from sklearn.ensemble._hist_gradient_boosting.common import ( - ALMOST_INF, G_H_DTYPE, PREDICTOR_RECORD_DTYPE, + ALMOST_INF, X_BINNED_DTYPE, X_BITSET_INNER_DTYPE, X_DTYPE, ) -from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower -from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor -from sklearn.metrics import r2_score -from sklearn.model_selection import train_test_split +from sklearn.ensemble._hist_gradient_boosting._bitset import ( + set_bitset_memoryview, + set_raw_bitset_from_binned_bitset, +) from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py index 24c18e468e457..0d19bdc6df72b 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py @@ -2,19 +2,17 @@ import pytest from numpy.testing import assert_array_equal -from sklearn.ensemble._hist_gradient_boosting.common import ( - G_H_DTYPE, - HISTOGRAM_DTYPE, - X_BINNED_DTYPE, - MonotonicConstraint, -) -from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder +from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE +from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint from sklearn.ensemble._hist_gradient_boosting.splitting import ( Splitter, compute_node_value, ) -from sklearn.utils._openmp_helpers import _openmp_effective_n_threads +from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder from sklearn.utils._testing import skip_if_32bit +from sklearn.utils._openmp_helpers import _openmp_effective_n_threads n_threads = _openmp_effective_n_threads() diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index 03a2720b36127..f8d7533ec38bc 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -1,15 +1,17 @@ import numpy as np +from numpy.testing import assert_array_equal +from numpy.testing import assert_allclose + import pytest -from numpy.testing import assert_allclose, assert_array_equal from sklearn.base import clone from sklearn.datasets import make_classification, make_regression -from sklearn.ensemble import ( - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, -) + +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.metrics import check_scoring + X_classification, y_classification = make_classification(random_state=0) X_regression, y_regression = make_regression(random_state=0) diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx index ce8fb51653ed7..d2123ecc61510 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx @@ -5,8 +5,8 @@ from cython.parallel import prange from ...base import is_classifier from .binning import _BinMapper - -from .common cimport G_H_DTYPE_C, Y_DTYPE_C +from .common cimport G_H_DTYPE_C +from .common cimport Y_DTYPE_C def get_equivalent_estimator(estimator, lib='lightgbm', n_classes=None): @@ -115,21 +115,24 @@ def get_equivalent_estimator(estimator, lib='lightgbm', n_classes=None): } if lib == 'lightgbm': - from lightgbm import LGBMClassifier, LGBMRegressor + from lightgbm import LGBMRegressor + from lightgbm import LGBMClassifier if is_classifier(estimator): return LGBMClassifier(**lightgbm_params) else: return LGBMRegressor(**lightgbm_params) elif lib == 'xgboost': - from xgboost import XGBClassifier, XGBRegressor + from xgboost import XGBRegressor + from xgboost import XGBClassifier if is_classifier(estimator): return XGBClassifier(**xgboost_params) else: return XGBRegressor(**xgboost_params) else: - from catboost import CatBoostClassifier, CatBoostRegressor + from catboost import CatBoostRegressor + from catboost import CatBoostClassifier if is_classifier(estimator): return CatBoostClassifier(**catboost_params) else: diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py index c50200c119fd5..5d50fad4780a7 100644 --- a/sklearn/ensemble/_iforest.py +++ b/sklearn/ensemble/_iforest.py @@ -3,16 +3,21 @@ # License: BSD 3 clause import numbers -from warnings import warn - import numpy as np from scipy.sparse import issparse +from warnings import warn -from ..base import OutlierMixin from ..tree import ExtraTreeRegressor from ..tree._tree import DTYPE as tree_dtype -from ..utils import check_array, check_random_state, gen_batches, get_chunk_n_rows -from ..utils.validation import _num_samples, check_is_fitted +from ..utils import ( + check_random_state, + check_array, + gen_batches, + get_chunk_n_rows, +) +from ..utils.validation import check_is_fitted, _num_samples +from ..base import OutlierMixin + from ._bagging import BaseBagging __all__ = ["IsolationForest"] diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 9f8754e410653..01556ec72e641 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -7,34 +7,34 @@ from copy import deepcopy import numpy as np -import scipy.sparse as sparse - from joblib import Parallel +import scipy.sparse as sparse -from ..base import ( - ClassifierMixin, - RegressorMixin, - TransformerMixin, - clone, - is_classifier, - is_regressor, -) +from ..base import clone +from ..base import ClassifierMixin, RegressorMixin, TransformerMixin +from ..base import is_classifier, is_regressor from ..exceptions import NotFittedError -from ..linear_model import LogisticRegression, RidgeCV -from ..model_selection import check_cv, cross_val_predict +from ..utils._estimator_html_repr import _VisualBlock + +from ._base import _fit_single_estimator +from ._base import _BaseHeterogeneousEnsemble + +from ..linear_model import LogisticRegression +from ..linear_model import RidgeCV + +from ..model_selection import cross_val_predict +from ..model_selection import check_cv + from ..preprocessing import LabelEncoder + from ..utils import Bunch -from ..utils._estimator_html_repr import _VisualBlock -from ..utils.fixes import delayed from ..utils.metaestimators import available_if from ..utils.multiclass import check_classification_targets -from ..utils.validation import ( - _check_feature_names_in, - check_is_fitted, - check_scalar, - column_or_1d, -) -from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator +from ..utils.validation import check_is_fitted +from ..utils.validation import check_scalar +from ..utils.validation import column_or_1d +from ..utils.fixes import delayed +from ..utils.validation import _check_feature_names_in def _estimator_has(attr): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index b8102e645e8ce..89f6dcd9ba217 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -13,23 +13,30 @@ # # License: BSD 3 clause -import numbers from abc import abstractmethod +import numbers import numpy as np from joblib import Parallel -from ..base import ClassifierMixin, RegressorMixin, TransformerMixin, clone -from ..exceptions import NotFittedError +from ..base import ClassifierMixin +from ..base import RegressorMixin +from ..base import TransformerMixin +from ..base import clone +from ._base import _fit_single_estimator +from ._base import _BaseHeterogeneousEnsemble from ..preprocessing import LabelEncoder -from ..utils import Bunch, check_scalar -from ..utils._estimator_html_repr import _VisualBlock -from ..utils.fixes import delayed +from ..utils import Bunch +from ..utils import check_scalar from ..utils.metaestimators import available_if +from ..utils.validation import check_is_fitted +from ..utils.validation import _check_feature_names_in from ..utils.multiclass import check_classification_targets -from ..utils.validation import _check_feature_names_in, check_is_fitted, column_or_1d -from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator +from ..utils.validation import column_or_1d +from ..exceptions import NotFittedError +from ..utils._estimator_html_repr import _VisualBlock +from ..utils.fixes import delayed class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 2fffbda19885e..902d960f5e06c 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -23,25 +23,28 @@ # # License: BSD 3 clause -import numbers -import warnings from abc import ABCMeta, abstractmethod +import numbers import numpy as np + +import warnings + from scipy.special import xlogy +from ._base import BaseEnsemble from ..base import ClassifierMixin, RegressorMixin, is_classifier, is_regressor -from ..metrics import accuracy_score, r2_score + from ..tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..utils import _safe_indexing, check_random_state, check_scalar -from ..utils.extmath import softmax, stable_cumsum -from ..utils.validation import ( - _check_sample_weight, - _num_samples, - check_is_fitted, - has_fit_parameter, -) -from ._base import BaseEnsemble +from ..utils import check_random_state, _safe_indexing +from ..utils import check_scalar +from ..utils.extmath import softmax +from ..utils.extmath import stable_cumsum +from ..metrics import accuracy_score, r2_score +from ..utils.validation import check_is_fitted +from ..utils.validation import _check_sample_weight +from ..utils.validation import has_fit_parameter +from ..utils.validation import _num_samples __all__ = [ "AdaBoostClassifier", diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index 22d97cea99a2a..1f78e61ab24fa 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -4,28 +4,33 @@ # Author: Gilles Louppe # License: BSD 3 clause -from itertools import cycle, product +from itertools import product import numpy as np +import joblib import pytest -from scipy.sparse import csc_matrix, csr_matrix -import joblib from sklearn.base import BaseEstimator -from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2 + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.model_selection import GridSearchCV, ParameterGrid from sklearn.ensemble import BaggingClassifier, BaggingRegressor -from sklearn.feature_selection import SelectKBest -from sklearn.linear_model import LogisticRegression, Perceptron -from sklearn.model_selection import GridSearchCV, ParameterGrid, train_test_split +from sklearn.linear_model import Perceptron, LogisticRegression from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import FunctionTransformer, scale -from sklearn.random_projection import SparseRandomProjection -from sklearn.svm import SVC, SVR from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.svm import SVC, SVR +from sklearn.random_projection import SparseRandomProjection +from sklearn.pipeline import make_pipeline +from sklearn.feature_selection import SelectKBest +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2 from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal +from sklearn.preprocessing import FunctionTransformer, scale +from itertools import cycle + +from scipy.sparse import csc_matrix, csr_matrix rng = check_random_state(0) diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index 3f5c488d8eeac..46b638c179859 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -5,18 +5,17 @@ # Authors: Gilles Louppe # License: BSD 3 clause -from collections import OrderedDict - import numpy as np import pytest from sklearn.datasets import load_iris -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.ensemble import BaggingClassifier from sklearn.ensemble._base import _set_random_states -from sklearn.feature_selection import SelectFromModel from sklearn.linear_model import Perceptron +from collections import OrderedDict +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.pipeline import Pipeline +from sklearn.feature_selection import SelectFromModel def test_base(): diff --git a/sklearn/ensemble/tests/test_common.py b/sklearn/ensemble/tests/test_common.py index 051408b5ba2f0..6c438571eaf39 100644 --- a/sklearn/ensemble/tests/test_common.py +++ b/sklearn/ensemble/tests/test_common.py @@ -1,25 +1,21 @@ import numpy as np import pytest -from sklearn.base import ClassifierMixin, clone, is_classifier -from sklearn.datasets import ( - load_diabetes, - load_iris, - make_classification, - make_regression, -) -from sklearn.ensemble import ( - RandomForestClassifier, - RandomForestRegressor, - StackingClassifier, - StackingRegressor, - VotingClassifier, - VotingRegressor, -) +from sklearn.base import clone +from sklearn.base import ClassifierMixin +from sklearn.base import is_classifier + +from sklearn.datasets import make_classification +from sklearn.datasets import make_regression +from sklearn.datasets import load_iris, load_diabetes from sklearn.impute import SimpleImputer -from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.svm import LinearSVC, LinearSVR, SVC, SVR from sklearn.pipeline import make_pipeline -from sklearn.svm import SVC, SVR, LinearSVC, LinearSVR +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + +from sklearn.ensemble import StackingClassifier, StackingRegressor +from sklearn.ensemble import VotingClassifier, VotingRegressor X, y = load_iris(return_X_y=True) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index dbd65f3554898..c235ec078f2e6 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -8,46 +8,54 @@ # Arnaud Joly # License: BSD 3 clause -import itertools -import math import pickle +import math from collections import defaultdict -from itertools import combinations, product -from typing import Any, Dict +import itertools +from itertools import combinations +from itertools import product +from typing import Dict, Any import numpy as np -import pytest -from numpy.testing import assert_allclose -from scipy.sparse import coo_matrix, csc_matrix, csr_matrix +from scipy.sparse import csr_matrix +from scipy.sparse import csc_matrix +from scipy.sparse import coo_matrix from scipy.special import comb +import pytest + import joblib -from sklearn import datasets -from sklearn.datasets import make_classification -from sklearn.decomposition import TruncatedSVD +from numpy.testing import assert_allclose + from sklearn.dummy import DummyRegressor -from sklearn.ensemble import ( - ExtraTreesClassifier, - ExtraTreesRegressor, - RandomForestClassifier, - RandomForestRegressor, - RandomTreesEmbedding, -) +from sklearn.metrics import mean_poisson_deviance +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import _convert_container +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import skip_if_no_parallel + from sklearn.exceptions import NotFittedError -from sklearn.metrics import mean_poisson_deviance, mean_squared_error -from sklearn.model_selection import GridSearchCV, train_test_split + +from sklearn import datasets +from sklearn.decomposition import TruncatedSVD +from sklearn.datasets import make_classification +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.ensemble import ExtraTreesRegressor +from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import RandomTreesEmbedding +from sklearn.model_selection import train_test_split +from sklearn.model_selection import GridSearchCV from sklearn.svm import LinearSVC -from sklearn.tree._classes import SPARSE_SPLITTERS -from sklearn.utils._testing import ( - _convert_container, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, - skip_if_no_parallel, -) from sklearn.utils.validation import check_random_state +from sklearn.metrics import mean_squared_error + +from sklearn.tree._classes import SPARSE_SPLITTERS + + # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] y = [-1, -1, -1, 1, 1, 1] diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 0adff603dc624..5a28bed077036 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -2,33 +2,38 @@ Testing for the gradient boosting module (sklearn.ensemble.gradient_boosting). """ import warnings - import numpy as np -import pytest from numpy.testing import assert_allclose -from scipy.sparse import coo_matrix, csc_matrix, csr_matrix + +from scipy.sparse import csr_matrix +from scipy.sparse import csc_matrix +from scipy.sparse import coo_matrix from scipy.special import expit +import pytest + from sklearn import datasets from sklearn.base import clone from sklearn.datasets import make_classification, make_regression -from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.ensemble import GradientBoostingRegressor from sklearn.ensemble._gradient_boosting import predict_stages -from sklearn.exceptions import DataConversionWarning, NotFittedError -from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import OneHotEncoder, scale +from sklearn.svm import LinearSVC from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import OneHotEncoder, scale -from sklearn.svm import LinearSVC, NuSVR from sklearn.utils import check_random_state, tosequence from sklearn.utils._mocking import NoSampleWeightWrapper -from sklearn.utils._testing import ( - assert_array_almost_equal, - assert_array_equal, - skip_if_32bit, -) +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import skip_if_32bit +from sklearn.exceptions import DataConversionWarning +from sklearn.exceptions import NotFittedError +from sklearn.dummy import DummyClassifier, DummyRegressor +from sklearn.pipeline import make_pipeline +from sklearn.linear_model import LinearRegression +from sklearn.svm import NuSVR + GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier, GradientBoostingRegressor] @@ -769,9 +774,10 @@ def test_oob_multilcass_iris(): def test_verbose_output(): # Check verbose=1 does not cause error. - import sys from io import StringIO + import sys + old_stdout = sys.stdout sys.stdout = StringIO() clf = GradientBoostingClassifier( @@ -800,8 +806,8 @@ def test_verbose_output(): def test_more_verbose_output(): # Check verbose=2 does not cause error. - import sys from io import StringIO + import sys old_stdout = sys.stdout sys.stdout = StringIO() diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py index 5b8884da05d0b..820b3b5697442 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py +++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py @@ -2,25 +2,22 @@ Testing for the gradient boosting loss functions and initial estimators. """ from itertools import product - import numpy as np -import pytest from numpy.testing import assert_allclose +import pytest from pytest import approx -from sklearn.ensemble._gb_losses import ( - LOSS_FUNCTIONS, - BinomialDeviance, - ExponentialLoss, - HuberLossFunction, - LeastAbsoluteError, - LeastSquaresError, - MultinomialDeviance, - QuantileLossFunction, - RegressionLossFunction, -) -from sklearn.metrics import mean_pinball_loss from sklearn.utils import check_random_state +from sklearn.metrics import mean_pinball_loss +from sklearn.ensemble._gb_losses import RegressionLossFunction +from sklearn.ensemble._gb_losses import LeastSquaresError +from sklearn.ensemble._gb_losses import LeastAbsoluteError +from sklearn.ensemble._gb_losses import HuberLossFunction +from sklearn.ensemble._gb_losses import QuantileLossFunction +from sklearn.ensemble._gb_losses import BinomialDeviance +from sklearn.ensemble._gb_losses import MultinomialDeviance +from sklearn.ensemble._gb_losses import ExponentialLoss +from sklearn.ensemble._gb_losses import LOSS_FUNCTIONS def test_binomial_deviance(): diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index c8ff6d82546a1..76464ac518af1 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -6,25 +6,26 @@ # Alexandre Gramfort # License: BSD 3 clause +import pytest import warnings -from unittest.mock import Mock, patch import numpy as np -import pytest -from scipy.sparse import csc_matrix, csr_matrix -from sklearn.datasets import load_diabetes, load_iris, make_classification +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import assert_allclose + +from sklearn.model_selection import ParameterGrid from sklearn.ensemble import IsolationForest from sklearn.ensemble._iforest import _average_path_length -from sklearn.metrics import roc_auc_score -from sklearn.model_selection import ParameterGrid, train_test_split +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_diabetes, load_iris, make_classification from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) +from sklearn.metrics import roc_auc_score + +from scipy.sparse import csc_matrix, csr_matrix +from unittest.mock import Mock, patch rng = check_random_state(0) diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index 8f9d34d188c8f..3fcc6f5dbefe8 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -3,39 +3,50 @@ # Authors: Guillaume Lemaitre # License: BSD 3 clause -from unittest.mock import Mock - -import numpy as np import pytest -import scipy.sparse as sparse +import numpy as np from numpy.testing import assert_array_equal +import scipy.sparse as sparse -from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone -from sklearn.datasets import ( - load_breast_cancer, - load_diabetes, - load_iris, - make_classification, - make_regression, -) -from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.ensemble import ( - RandomForestClassifier, - RandomForestRegressor, - StackingClassifier, - StackingRegressor, -) -from sklearn.exceptions import ConvergenceWarning, NotFittedError -from sklearn.linear_model import LinearRegression, LogisticRegression -from sklearn.model_selection import KFold, StratifiedKFold, train_test_split +from sklearn.base import BaseEstimator +from sklearn.base import ClassifierMixin +from sklearn.base import RegressorMixin +from sklearn.base import clone + +from sklearn.exceptions import ConvergenceWarning + +from sklearn.datasets import load_iris +from sklearn.datasets import load_diabetes +from sklearn.datasets import load_breast_cancer +from sklearn.datasets import make_regression +from sklearn.datasets import make_classification + +from sklearn.dummy import DummyClassifier +from sklearn.dummy import DummyRegressor +from sklearn.linear_model import LogisticRegression +from sklearn.linear_model import LinearRegression +from sklearn.svm import LinearSVC +from sklearn.svm import LinearSVR +from sklearn.svm import SVC +from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import scale -from sklearn.svm import SVC, LinearSVC, LinearSVR + +from sklearn.ensemble import StackingClassifier +from sklearn.ensemble import StackingRegressor + +from sklearn.model_selection import train_test_split +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import KFold + from sklearn.utils._mocking import CheckingClassifier -from sklearn.utils._testing import ( - assert_allclose, - assert_allclose_dense_sparse, - ignore_warnings, -) +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import ignore_warnings + +from sklearn.exceptions import NotFittedError + +from unittest.mock import Mock diabetes = load_diabetes() X_diabetes, y_diabetes = diabetes.data, diabetes.target diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index ee3ee8cb75f0e..b7d7533da09a1 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -1,33 +1,30 @@ """Testing for the VotingClassifier and VotingRegressor""" +import pytest import re - import numpy as np -import pytest +from sklearn.utils._testing import assert_almost_equal, assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.exceptions import NotFittedError +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import LogisticRegression +from sklearn.naive_bayes import GaussianNB +from sklearn.ensemble import RandomForestClassifier +from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import VotingClassifier, VotingRegressor +from sklearn.tree import DecisionTreeClassifier +from sklearn.tree import DecisionTreeRegressor +from sklearn.model_selection import GridSearchCV from sklearn import datasets -from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.model_selection import cross_val_score, train_test_split from sklearn.datasets import make_multilabel_classification -from sklearn.dummy import DummyRegressor -from sklearn.ensemble import ( - RandomForestClassifier, - RandomForestRegressor, - VotingClassifier, - VotingRegressor, -) -from sklearn.exceptions import NotFittedError -from sklearn.linear_model import LinearRegression, LogisticRegression -from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split +from sklearn.svm import SVC from sklearn.multiclass import OneVsRestClassifier -from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) +from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.dummy import DummyRegressor + # Load datasets iris = datasets.load_iris() diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index 6384f2de46bac..0348641d39453 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -1,27 +1,33 @@ """Testing for the boost module (sklearn.ensemble.boost).""" -import re - import numpy as np import pytest -from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, dok_matrix, lil_matrix +import re -from sklearn import datasets -from sklearn.base import BaseEstimator, clone +from scipy.sparse import csc_matrix +from scipy.sparse import csr_matrix +from scipy.sparse import coo_matrix +from scipy.sparse import dok_matrix +from scipy.sparse import lil_matrix + +from sklearn.utils._testing import assert_array_equal, assert_array_less +from sklearn.utils._testing import assert_array_almost_equal + +from sklearn.base import BaseEstimator +from sklearn.base import clone from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor -from sklearn.ensemble._weight_boosting import _samme_proba from sklearn.linear_model import LinearRegression -from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.model_selection import train_test_split +from sklearn.model_selection import GridSearchCV +from sklearn.ensemble import AdaBoostClassifier +from sklearn.ensemble import AdaBoostRegressor +from sklearn.ensemble._weight_boosting import _samme_proba from sklearn.svm import SVC, SVR from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils import shuffle from sklearn.utils._mocking import NoSampleWeightWrapper -from sklearn.utils._testing import ( - assert_array_almost_equal, - assert_array_equal, - assert_array_less, -) +from sklearn import datasets + # Common random state rng = np.random.RandomState(0) diff --git a/sklearn/experimental/enable_halving_search_cv.py b/sklearn/experimental/enable_halving_search_cv.py index dd399ef35b6f7..f6937b0d14c01 100644 --- a/sklearn/experimental/enable_halving_search_cv.py +++ b/sklearn/experimental/enable_halving_search_cv.py @@ -19,12 +19,13 @@ flake8 to ignore the import, which appears as unused. """ -from .. import model_selection from ..model_selection._search_successive_halving import ( - HalvingGridSearchCV, HalvingRandomSearchCV, + HalvingGridSearchCV, ) +from .. import model_selection + # use settattr to avoid mypy errors when monkeypatching setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV) setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV) diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py index d287400c7999f..f0416ac013e96 100644 --- a/sklearn/experimental/enable_hist_gradient_boosting.py +++ b/sklearn/experimental/enable_hist_gradient_boosting.py @@ -12,6 +12,7 @@ import warnings + warnings.warn( "Since version 1.0, " "it is not needed to import enable_hist_gradient_boosting anymore. " diff --git a/sklearn/experimental/enable_iterative_imputer.py b/sklearn/experimental/enable_iterative_imputer.py index 0b906961ca184..9ef9f6a0dbdf0 100644 --- a/sklearn/experimental/enable_iterative_imputer.py +++ b/sklearn/experimental/enable_iterative_imputer.py @@ -12,8 +12,8 @@ >>> from sklearn.impute import IterativeImputer """ -from .. import impute from ..impute._iterative import IterativeImputer +from .. import impute # use settattr to avoid mypy errors when monkeypatching setattr(impute, "IterativeImputer", IterativeImputer) diff --git a/sklearn/externals/_arff.py b/sklearn/externals/_arff.py index 2d65e244b3e21..7c9d51d0702ff 100644 --- a/sklearn/externals/_arff.py +++ b/sklearn/externals/_arff.py @@ -24,7 +24,7 @@ # SOFTWARE. # ============================================================================= -""" +''' The liac-arff module implements functions to read and write ARFF files in Python. It was created in the Connectionist Artificial Intelligence Laboratory (LIAC), which takes place at the Federal University of Rio Grande do Sul @@ -140,34 +140,33 @@ - Fully compatible with Python 2.7+, Python 3.5+, pypy and pypy3; - Under `MIT License `_ -""" -__author__ = "Renato de Pontes Pereira, Matthias Feurer, Joel Nothman" -__author_email__ = ( - "renato.ppontes@gmail.com, " - "feurerm@informatik.uni-freiburg.de, " - "joel.nothman@gmail.com" -) -__version__ = "2.4.0" +''' +__author__ = 'Renato de Pontes Pereira, Matthias Feurer, Joel Nothman' +__author_email__ = ('renato.ppontes@gmail.com, ' + 'feurerm@informatik.uni-freiburg.de, ' + 'joel.nothman@gmail.com') +__version__ = '2.4.0' -import csv import re -from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union +import csv +from typing import TYPE_CHECKING +from typing import Optional, List, Dict, Any, Iterator, Union, Tuple # CONSTANTS =================================================================== -_SIMPLE_TYPES = ["NUMERIC", "REAL", "INTEGER", "STRING"] +_SIMPLE_TYPES = ['NUMERIC', 'REAL', 'INTEGER', 'STRING'] -_TK_DESCRIPTION = "%" -_TK_COMMENT = "%" -_TK_RELATION = "@RELATION" -_TK_ATTRIBUTE = "@ATTRIBUTE" -_TK_DATA = "@DATA" +_TK_DESCRIPTION = '%' +_TK_COMMENT = '%' +_TK_RELATION = '@RELATION' +_TK_ATTRIBUTE = '@ATTRIBUTE' +_TK_DATA = '@DATA' -_RE_RELATION = re.compile(r"^([^\{\}%,\s]*|\".*\"|\'.*\')$", re.UNICODE) -_RE_ATTRIBUTE = re.compile(r"^(\".*\"|\'.*\'|[^\{\}%,\s]*)\s+(.+)$", re.UNICODE) +_RE_RELATION = re.compile(r'^([^\{\}%,\s]*|\".*\"|\'.*\')$', re.UNICODE) +_RE_ATTRIBUTE = re.compile(r'^(\".*\"|\'.*\'|[^\{\}%,\s]*)\s+(.+)$', re.UNICODE) _RE_QUOTE_CHARS = re.compile(r'["\'\\\s%,\000-\031]', re.UNICODE) _RE_ESCAPE_CHARS = re.compile(r'(?=["\'\\%])|[\n\r\t\000-\031]') -_RE_SPARSE_LINE = re.compile(r"^\s*\{.*\}\s*$", re.UNICODE) -_RE_NONTRIVIAL_DATA = re.compile("[\"'{}\\s]", re.UNICODE) +_RE_SPARSE_LINE = re.compile(r'^\s*\{.*\}\s*$', re.UNICODE) +_RE_NONTRIVIAL_DATA = re.compile('["\'{}\\s]', re.UNICODE) ArffDenseDataType = Iterator[List] ArffSparseDataType = Tuple[List, ...] @@ -188,7 +187,7 @@ class ArffContainerType(TypedDict): def _build_re_values(): - quoted_re = r""" + quoted_re = r''' " # open quote followed by zero or more of: (?: (?= len(conversors): raise BadDataFormat(row) # XXX: int 0 is used for implicit values, not '0' - values = [ - values[i] if i in values else 0 for i in range(len(conversors)) - ] + values = [values[i] if i in values else 0 for i in + range(len(conversors))] else: if len(values) != len(conversors): raise BadDataFormat(row) @@ -498,17 +476,16 @@ def decode_rows(self, stream, conversors): @staticmethod def _decode_values(values, conversors): try: - values = [ - None if value is None else conversor(value) - for conversor, value in zip(conversors, values) - ] + values = [None if value is None else conversor(value) + for conversor, value + in zip(conversors, values)] except ValueError as exc: - if "float: " in str(exc): + if 'float: ' in str(exc): raise BadNumericalValue() return values def encode_data(self, data, attributes): - """(INTERNAL) Encodes a line of data. + '''(INTERNAL) Encodes a line of data. Data instances follow the csv format, i.e, attribute values are delimited by commas. After converted from csv. @@ -516,31 +493,30 @@ def encode_data(self, data, attributes): :param data: a list of values. :param attributes: a list of attributes. Used to check if data is valid. :return: a string with the encoded data line. - """ + ''' current_row = 0 for inst in data: if len(inst) != len(attributes): raise BadObject( - "Instance %d has %d attributes, expected %d" - % (current_row, len(inst), len(attributes)) + 'Instance %d has %d attributes, expected %d' % + (current_row, len(inst), len(attributes)) ) new_data = [] for value in inst: - if value is None or value == "" or value != value: - s = "?" + if value is None or value == '' or value != value: + s = '?' else: s = encode_string(str(value)) new_data.append(s) current_row += 1 - yield ",".join(new_data) + yield ','.join(new_data) class _DataListMixin: """Mixin to return a list from decode_rows instead of a generator""" - def decode_rows(self, stream, conversors): return list(super().decode_rows(stream, conversors)) @@ -560,12 +536,10 @@ def decode_rows(self, stream, conversors): continue row_cols, values = zip(*sorted(values.items())) try: - values = [ - value if value is None else conversors[key](value) - for key, value in zip(row_cols, values) - ] + values = [value if value is None else conversors[key](value) + for key, value in zip(row_cols, values)] except ValueError as exc: - if "float: " in str(exc): + if 'float: ' in str(exc): raise BadNumericalValue() raise except IndexError: @@ -589,30 +563,30 @@ def encode_data(self, data, attributes): # Check if the rows are sorted if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)): - raise ValueError("liac-arff can only output COO matrices with sorted rows.") + raise ValueError("liac-arff can only output COO matrices with " + "sorted rows.") for v, col, row in zip(data, col, row): if row > current_row: # Add empty rows if necessary while current_row < row: - yield " ".join(["{", ",".join(new_data), "}"]) + yield " ".join(["{", ','.join(new_data), "}"]) new_data = [] current_row += 1 if col >= num_attributes: raise BadObject( - "Instance %d has at least %d attributes, expected %d" - % (current_row, col + 1, num_attributes) + 'Instance %d has at least %d attributes, expected %d' % + (current_row, col + 1, num_attributes) ) - if v is None or v == "" or v != v: - s = "?" + if v is None or v == '' or v != v: + s = '?' else: s = encode_string(str(v)) new_data.append("%d %s" % (col, s)) - yield " ".join(["{", ",".join(new_data), "}"]) - + yield " ".join(["{", ','.join(new_data), "}"]) class LODGeneratorData: def decode_rows(self, stream, conversors): @@ -622,12 +596,10 @@ def decode_rows(self, stream, conversors): if not isinstance(values, dict): raise BadLayout() try: - yield { - key: None if value is None else conversors[key](value) - for key, value in values.items() - } + yield {key: None if value is None else conversors[key](value) + for key, value in values.items()} except ValueError as exc: - if "float: " in str(exc): + if 'float: ' in str(exc): raise BadNumericalValue() raise except IndexError: @@ -643,21 +615,20 @@ def encode_data(self, data, attributes): if len(row) > 0 and max(row) >= num_attributes: raise BadObject( - "Instance %d has %d attributes, expected %d" - % (current_row, max(row) + 1, num_attributes) + 'Instance %d has %d attributes, expected %d' % + (current_row, max(row) + 1, num_attributes) ) for col in sorted(row): v = row[col] - if v is None or v == "" or v != v: - s = "?" + if v is None or v == '' or v != v: + s = '?' else: s = encode_string(str(v)) new_data.append("%d %s" % (col, s)) current_row += 1 - yield " ".join(["{", ",".join(new_data), "}"]) - + yield " ".join(["{", ','.join(new_data), "}"]) class LODData(_DataListMixin, LODGeneratorData): pass @@ -677,33 +648,31 @@ def _get_data_object_for_decoding(matrix_type): else: raise ValueError("Matrix type %s not supported." % str(matrix_type)) - def _get_data_object_for_encoding(matrix): # Probably a scipy.sparse - if hasattr(matrix, "format"): - if matrix.format == "coo": + if hasattr(matrix, 'format'): + if matrix.format == 'coo': return COOData() else: - raise ValueError("Cannot guess matrix format!") + raise ValueError('Cannot guess matrix format!') elif isinstance(matrix[0], dict): return LODData() else: return Data() - # ============================================================================= # ADVANCED INTERFACE ========================================================== class ArffDecoder: - """An ARFF decoder.""" + '''An ARFF decoder.''' def __init__(self): - """Constructor.""" + '''Constructor.''' self._conversors = [] self._current_line = 0 def _decode_comment(self, s): - """(INTERNAL) Decodes a comment line. + '''(INTERNAL) Decodes a comment line. Comments are single line strings starting, obligatorily, with the ``%`` character, and can have any symbol, including whitespaces or special @@ -714,12 +683,12 @@ def _decode_comment(self, s): :param s: a normalized string. :return: a string with the decoded comment. - """ - res = re.sub(r"^\%( )?", "", s) + ''' + res = re.sub(r'^\%( )?', '', s) return res def _decode_relation(self, s): - """(INTERNAL) Decodes a relation line. + '''(INTERNAL) Decodes a relation line. The relation declaration is a line with the format ``@RELATION ``, where ``relation-name`` is a string. The string must @@ -731,18 +700,18 @@ def _decode_relation(self, s): :param s: a normalized string. :return: a string with the decoded relation name. - """ - _, v = s.split(" ", 1) + ''' + _, v = s.split(' ', 1) v = v.strip() if not _RE_RELATION.match(v): raise BadRelationFormat() - res = str(v.strip("\"'")) + res = str(v.strip('"\'')) return res def _decode_attribute(self, s): - """(INTERNAL) Decodes an attribute line. + '''(INTERNAL) Decodes an attribute line. The attribute is the most complex declaration in an arff file. All attributes must follow the template:: @@ -767,8 +736,8 @@ def _decode_attribute(self, s): :param s: a normalized string. :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES). - """ - _, v = s.split(" ", 1) + ''' + _, v = s.split(' ', 1) v = v.strip() # Verify the general structure of declaration @@ -780,12 +749,12 @@ def _decode_attribute(self, s): name, type_ = m.groups() # Extracts the final name - name = str(name.strip("\"'")) + name = str(name.strip('"\'')) # Extracts the final type if type_[:1] == "{" and type_[-1:] == "}": try: - type_ = _parse_values(type_.strip("{} ")) + type_ = _parse_values(type_.strip('{} ')) except Exception: raise BadAttributeType() if isinstance(type_, dict): @@ -794,27 +763,27 @@ def _decode_attribute(self, s): else: # If not nominal, verify the type name type_ = str(type_).upper() - if type_ not in ["NUMERIC", "REAL", "INTEGER", "STRING"]: + if type_ not in ['NUMERIC', 'REAL', 'INTEGER', 'STRING']: raise BadAttributeType() return (name, type_) def _decode(self, s, encode_nominal=False, matrix_type=DENSE): - """Do the job the ``encode``.""" + '''Do the job the ``encode``.''' # Make sure this method is idempotent self._current_line = 0 # If string, convert to a list of lines if isinstance(s, str): - s = s.strip("\r\n ").replace("\r\n", "\n").split("\n") + s = s.strip('\r\n ').replace('\r\n', '\n').split('\n') # Create the return object obj: ArffContainerType = { - "description": "", - "relation": "", - "attributes": [], - "data": [], + 'description': '', + 'relation': '', + 'attributes': [], + 'data': [] } attribute_names = {} @@ -827,15 +796,14 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): for row in s: self._current_line += 1 # Ignore empty lines - row = row.strip(" \r\n") - if not row: - continue + row = row.strip(' \r\n') + if not row: continue u_row = row.upper() # DESCRIPTION ----------------------------------------------------- if u_row.startswith(_TK_DESCRIPTION) and STATE == _TK_DESCRIPTION: - obj["description"] += self._decode_comment(row) + "\n" + obj['description'] += self._decode_comment(row) + '\n' # ----------------------------------------------------------------- # RELATION -------------------------------------------------------- @@ -844,7 +812,7 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): raise BadLayout() STATE = _TK_RELATION - obj["relation"] = self._decode_relation(row) + obj['relation'] = self._decode_relation(row) # ----------------------------------------------------------------- # ATTRIBUTE ------------------------------------------------------- @@ -859,7 +827,7 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): raise BadAttributeName(attr[0], attribute_names[attr[0]]) else: attribute_names[attr[0]] = self._current_line - obj["attributes"].append(attr) + obj['attributes'].append(attr) if isinstance(attr[1], (list, tuple)): if encode_nominal: @@ -867,12 +835,10 @@ def _decode(self, s, encode_nominal=False, matrix_type=DENSE): else: conversor = NominalConversor(attr[1]) else: - CONVERSOR_MAP = { - "STRING": str, - "INTEGER": lambda x: int(float(x)), - "NUMERIC": float, - "REAL": float, - } + CONVERSOR_MAP = {'STRING': str, + 'INTEGER': lambda x: int(float(x)), + 'NUMERIC': float, + 'REAL': float} conversor = CONVERSOR_MAP[attr[1]] self._conversors.append(conversor) @@ -903,14 +869,14 @@ def stream(): yield row # Alter the data object - obj["data"] = data.decode_rows(stream(), self._conversors) - if obj["description"].endswith("\n"): - obj["description"] = obj["description"][:-1] + obj['data'] = data.decode_rows(stream(), self._conversors) + if obj['description'].endswith('\n'): + obj['description'] = obj['description'][:-1] return obj def decode(self, s, encode_nominal=False, return_type=DENSE): - """Returns the Python representation of a given ARFF file. + '''Returns the Python representation of a given ARFF file. When a file object is passed as an argument, this method reads lines iteratively, avoiding to load unnecessary information to the memory. @@ -923,21 +889,20 @@ def decode(self, s, encode_nominal=False, return_type=DENSE): `arff.DENSE_GEN` or `arff.LOD_GEN`. Consult the sections on `working with sparse data`_ and `loading progressively`_. - """ + ''' try: - return self._decode( - s, encode_nominal=encode_nominal, matrix_type=return_type - ) + return self._decode(s, encode_nominal=encode_nominal, + matrix_type=return_type) except ArffException as e: e.line = self._current_line raise e class ArffEncoder: - """An ARFF encoder.""" + '''An ARFF encoder.''' - def _encode_comment(self, s=""): - """(INTERNAL) Encodes a comment line. + def _encode_comment(self, s=''): + '''(INTERNAL) Encodes a comment line. Comments are single line strings starting, obligatorily, with the ``%`` character, and can have any symbol, including whitespaces or special @@ -947,30 +912,30 @@ def _encode_comment(self, s=""): :param s: (OPTIONAL) string. :return: a string with the encoded comment line. - """ + ''' if s: - return "%s %s" % (_TK_COMMENT, s) + return '%s %s'%(_TK_COMMENT, s) else: - return "%s" % _TK_COMMENT + return '%s' % _TK_COMMENT def _encode_relation(self, name): - """(INTERNAL) Decodes a relation line. + '''(INTERNAL) Decodes a relation line. The relation declaration is a line with the format ``@RELATION ``, where ``relation-name`` is a string. :param name: a string. :return: a string with the encoded relation declaration. - """ - for char in " %{},": + ''' + for char in ' %{},': if char in name: - name = '"%s"' % name + name = '"%s"'%name break - return "%s %s" % (_TK_RELATION, name) + return '%s %s'%(_TK_RELATION, name) def _encode_attribute(self, name, type_): - """(INTERNAL) Encodes an attribute line. + '''(INTERNAL) Encodes an attribute line. The attribute follow the template:: @@ -991,99 +956,94 @@ def _encode_attribute(self, name, type_): :param name: a string. :param type_: a string or a list of string. :return: a string with the encoded attribute declaration. - """ - for char in " %{},": + ''' + for char in ' %{},': if char in name: - name = '"%s"' % name + name = '"%s"'%name break if isinstance(type_, (tuple, list)): - type_tmp = ["%s" % encode_string(type_k) for type_k in type_] - type_ = "{%s}" % ", ".join(type_tmp) + type_tmp = ['%s' % encode_string(type_k) for type_k in type_] + type_ = '{%s}'%(', '.join(type_tmp)) - return "%s %s %s" % (_TK_ATTRIBUTE, name, type_) + return '%s %s %s'%(_TK_ATTRIBUTE, name, type_) def encode(self, obj): - """Encodes a given object to an ARFF file. + '''Encodes a given object to an ARFF file. :param obj: the object containing the ARFF information. :return: the ARFF file as an string. - """ + ''' data = [row for row in self.iter_encode(obj)] - return "\n".join(data) + return '\n'.join(data) def iter_encode(self, obj): - """The iterative version of `arff.ArffEncoder.encode`. + '''The iterative version of `arff.ArffEncoder.encode`. This encodes iteratively a given object and return, one-by-one, the lines of the ARFF file. :param obj: the object containing the ARFF information. :return: (yields) the ARFF file as strings. - """ + ''' # DESCRIPTION - if obj.get("description", None): - for row in obj["description"].split("\n"): + if obj.get('description', None): + for row in obj['description'].split('\n'): yield self._encode_comment(row) # RELATION - if not obj.get("relation"): - raise BadObject("Relation name not found or with invalid value.") + if not obj.get('relation'): + raise BadObject('Relation name not found or with invalid value.') - yield self._encode_relation(obj["relation"]) - yield "" + yield self._encode_relation(obj['relation']) + yield '' # ATTRIBUTES - if not obj.get("attributes"): - raise BadObject("Attributes not found.") + if not obj.get('attributes'): + raise BadObject('Attributes not found.') attribute_names = set() - for attr in obj["attributes"]: + for attr in obj['attributes']: # Verify for bad object format - if ( - not isinstance(attr, (tuple, list)) - or len(attr) != 2 - or not isinstance(attr[0], str) - ): - raise BadObject('Invalid attribute declaration "%s"' % str(attr)) + if not isinstance(attr, (tuple, list)) or \ + len(attr) != 2 or \ + not isinstance(attr[0], str): + raise BadObject('Invalid attribute declaration "%s"'%str(attr)) if isinstance(attr[1], str): # Verify for invalid types if attr[1] not in _SIMPLE_TYPES: - raise BadObject('Invalid attribute type "%s"' % str(attr)) + raise BadObject('Invalid attribute type "%s"'%str(attr)) # Verify for bad object format elif not isinstance(attr[1], (tuple, list)): - raise BadObject('Invalid attribute type "%s"' % str(attr)) + raise BadObject('Invalid attribute type "%s"'%str(attr)) # Verify attribute name is not used twice if attr[0] in attribute_names: - raise BadObject( - 'Trying to use attribute name "%s" for the second time.' - % str(attr[0]) - ) + raise BadObject('Trying to use attribute name "%s" for the ' + 'second time.' % str(attr[0])) else: attribute_names.add(attr[0]) yield self._encode_attribute(attr[0], attr[1]) - yield "" - attributes = obj["attributes"] + yield '' + attributes = obj['attributes'] # DATA yield _TK_DATA - if "data" in obj: - data = _get_data_object_for_encoding(obj.get("data")) - yield from data.encode_data(obj.get("data"), attributes) - - yield "" + if 'data' in obj: + data = _get_data_object_for_encoding(obj.get('data')) + yield from data.encode_data(obj.get('data'), attributes) + yield '' # ============================================================================= # BASIC INTERFACE ============================================================= def load(fp, encode_nominal=False, return_type=DENSE): - """Load a file-like object containing the ARFF document and convert it into + '''Load a file-like object containing the ARFF document and convert it into a Python object. :param fp: a file-like object. @@ -1095,13 +1055,13 @@ def load(fp, encode_nominal=False, return_type=DENSE): Consult the sections on `working with sparse data`_ and `loading progressively`_. :return: a dictionary. - """ + ''' decoder = ArffDecoder() - return decoder.decode(fp, encode_nominal=encode_nominal, return_type=return_type) - + return decoder.decode(fp, encode_nominal=encode_nominal, + return_type=return_type) def loads(s, encode_nominal=False, return_type=DENSE): - """Convert a string instance containing the ARFF document into a Python + '''Convert a string instance containing the ARFF document into a Python object. :param s: a string object. @@ -1113,38 +1073,35 @@ def loads(s, encode_nominal=False, return_type=DENSE): Consult the sections on `working with sparse data`_ and `loading progressively`_. :return: a dictionary. - """ + ''' decoder = ArffDecoder() - return decoder.decode(s, encode_nominal=encode_nominal, return_type=return_type) - + return decoder.decode(s, encode_nominal=encode_nominal, + return_type=return_type) def dump(obj, fp): - """Serialize an object representing the ARFF document to a given file-like + '''Serialize an object representing the ARFF document to a given file-like object. :param obj: a dictionary. :param fp: a file-like object. - """ + ''' encoder = ArffEncoder() generator = encoder.iter_encode(obj) last_row = next(generator) for row in generator: - fp.write(last_row + "\n") + fp.write(last_row + '\n') last_row = row fp.write(last_row) return fp - def dumps(obj): - """Serialize an object representing the ARFF document, returning a string. + '''Serialize an object representing the ARFF document, returning a string. :param obj: a dictionary. :return: a string with the ARFF document. - """ + ''' encoder = ArffEncoder() return encoder.encode(obj) - - # ============================================================================= diff --git a/sklearn/externals/_lobpcg.py b/sklearn/externals/_lobpcg.py index cb96d8b296a3a..1de3900b3f89c 100644 --- a/sklearn/externals/_lobpcg.py +++ b/sklearn/externals/_lobpcg.py @@ -21,11 +21,11 @@ """ import warnings - import numpy as np -from numpy import block as bmat -from scipy.linalg import LinAlgError, cho_factor, cho_solve, cholesky, eigh, inv +from scipy.linalg import (inv, eigh, cho_factor, cho_solve, + cholesky, LinAlgError) from scipy.sparse.linalg import aslinearoperator +from numpy import block as bmat __all__ = ["lobpcg"] @@ -42,12 +42,10 @@ def _report_nonhermitian(M, name): tol = max(tol, tol * norm(M, 1)) if nmd > tol: warnings.warn( - f"Matrix {name} of the type {M.dtype} is not Hermitian: " - f"condition: {nmd} < {tol} fails.", - UserWarning, - stacklevel=4, - ) - + f"Matrix {name} of the type {M.dtype} is not Hermitian: " + f"condition: {nmd} < {tol} fails.", + UserWarning, stacklevel=4 + ) def _as2d(ar): """ @@ -123,7 +121,7 @@ def _get_indx(_lambda, num, largest): """Get `num` indices into `_lambda` depending on `largest` option.""" ii = np.argsort(_lambda) if largest: - ii = ii[: -num - 1 : -1] + ii = ii[:-num - 1:-1] else: ii = ii[:num] @@ -343,9 +341,8 @@ def lobpcg( warnings.warn( f"The problem size {n} minus the constraints size {sizeY} " f"is too small relative to the block size {sizeX}. " - "Using a dense eigensolver instead of LOBPCG.", - UserWarning, - stacklevel=2, + f"Using a dense eigensolver instead of LOBPCG.", + UserWarning, stacklevel=2 ) sizeX = min(sizeX, n) @@ -364,7 +361,10 @@ def lobpcg( A_dense = A(np.eye(n, dtype=A.dtype)) B_dense = None if B is None else B(np.eye(n, dtype=B.dtype)) - vals, vecs = eigh(A_dense, B_dense, eigvals=eigvals, check_finite=False) + vals, vecs = eigh(A_dense, + B_dense, + eigvals=eigvals, + check_finite=False) if largest: # Reverse order to be compatible with eigs() in 'LM' mode. vals = vals[::-1] @@ -438,7 +438,7 @@ def lobpcg( while iterationNumber < maxiter: iterationNumber += 1 if verbosityLevel > 0: - print("-" * 50) + print("-"*50) print(f"iteration {iterationNumber}") if B is not None: @@ -488,17 +488,22 @@ def lobpcg( ## # Apply constraints to the preconditioned residuals. if blockVectorY is not None: - _applyConstraints(activeBlockVectorR, gramYBY, blockVectorBY, blockVectorY) + _applyConstraints(activeBlockVectorR, + gramYBY, + blockVectorBY, + blockVectorY) ## # B-orthogonalize the preconditioned residuals to X. if B is not None: activeBlockVectorR = activeBlockVectorR - ( - blockVectorX @ (blockVectorBX.T.conj() @ activeBlockVectorR) + blockVectorX @ + (blockVectorBX.T.conj() @ activeBlockVectorR) ) else: activeBlockVectorR = activeBlockVectorR - ( - blockVectorX @ (blockVectorX.T.conj() @ activeBlockVectorR) + blockVectorX @ + (blockVectorX.T.conj() @ activeBlockVectorR) ) ## @@ -511,8 +516,7 @@ def lobpcg( f"Failed at iteration {iterationNumber} with accuracies " f"{residualNorms}\n not reaching the requested " f"tolerance {residualTolerance}.", - UserWarning, - stacklevel=2, + UserWarning, stacklevel=2 ) break activeBlockVectorAR = A(activeBlockVectorR) @@ -592,7 +596,8 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): gramRBP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP) if explicitGramFlag: gramPAP = (gramPAP + gramPAP.T.conj()) / 2 - gramPBP = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorBP) + gramPBP = np.dot(activeBlockVectorP.T.conj(), + activeBlockVectorBP) else: gramPBP = ident @@ -614,7 +619,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): _handle_gramA_gramB_verbosity(gramA, gramB) try: - _lambda, eigBlockVector = eigh(gramA, gramB, check_finite=False) + _lambda, eigBlockVector = eigh(gramA, + gramB, + check_finite=False) except LinAlgError: # try again after dropping the direction vectors P from RR restart = True @@ -626,7 +633,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): _handle_gramA_gramB_verbosity(gramA, gramB) try: - _lambda, eigBlockVector = eigh(gramA, gramB, check_finite=False) + _lambda, eigBlockVector = eigh(gramA, + gramB, + check_finite=False) except LinAlgError as e: raise ValueError("eigh has failed in lobpcg iterations") from e @@ -655,8 +664,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): if B is not None: if not restart: eigBlockVectorX = eigBlockVector[:sizeX] - eigBlockVectorR = eigBlockVector[sizeX : sizeX + currentBlockSize] - eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize :] + eigBlockVectorR = eigBlockVector[sizeX: + sizeX + currentBlockSize] + eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:] pp = np.dot(activeBlockVectorR, eigBlockVectorR) pp += np.dot(activeBlockVectorP, eigBlockVectorP) @@ -688,8 +698,9 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): else: if not restart: eigBlockVectorX = eigBlockVector[:sizeX] - eigBlockVectorR = eigBlockVector[sizeX : sizeX + currentBlockSize] - eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize :] + eigBlockVectorR = eigBlockVector[sizeX: + sizeX + currentBlockSize] + eigBlockVectorP = eigBlockVector[sizeX + currentBlockSize:] pp = np.dot(activeBlockVectorR, eigBlockVectorR) pp += np.dot(activeBlockVectorP, eigBlockVectorP) @@ -728,8 +739,7 @@ def _handle_gramA_gramB_verbosity(gramA, gramB): f"Exited at iteration {iterationNumber} with accuracies \n" f"{residualNorms}\n" f"not reaching the requested tolerance {residualTolerance}.", - UserWarning, - stacklevel=2, + UserWarning, stacklevel=2 ) # Future work: Need to add Postprocessing here: diff --git a/sklearn/externals/_numpy_compiler_patch.py b/sklearn/externals/_numpy_compiler_patch.py index 7bee1b66c83f9..a424d8e99a8ef 100644 --- a/sklearn/externals/_numpy_compiler_patch.py +++ b/sklearn/externals/_numpy_compiler_patch.py @@ -29,9 +29,9 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -import re -import subprocess import sys +import subprocess +import re from distutils.errors import DistutilsExecError from numpy.distutils import log diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py index f4db85303f4b6..a9c1496181b3b 100644 --- a/sklearn/feature_extraction/__init__.py +++ b/sklearn/feature_extraction/__init__.py @@ -4,10 +4,10 @@ images. """ -from . import text from ._dict_vectorizer import DictVectorizer from ._hash import FeatureHasher -from .image import grid_to_graph, img_to_graph +from .image import img_to_graph, grid_to_graph +from . import text __all__ = [ "DictVectorizer", diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index b4afb797bd240..e04c409027bda 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -3,9 +3,9 @@ # License: BSD 3 clause from array import array -from collections.abc import Iterable, Mapping -from numbers import Number +from collections.abc import Mapping, Iterable from operator import itemgetter +from numbers import Number import numpy as np import scipy.sparse as sp diff --git a/sklearn/feature_extraction/_hashing_fast.pyx b/sklearn/feature_extraction/_hashing_fast.pyx index 0a64f94442d88..48dbd928a03d3 100644 --- a/sklearn/feature_extraction/_hashing_fast.pyx +++ b/sklearn/feature_extraction/_hashing_fast.pyx @@ -1,19 +1,17 @@ # Author: Lars Buitinck # License: BSD 3 clause -import array import sys - +import array cimport cython -cimport numpy as cnp from libc.stdlib cimport abs from libcpp.vector cimport vector +cimport numpy as cnp import numpy as np - from ..utils._typedefs cimport INT32TYPE_t, INT64TYPE_t -from ..utils._vector_sentinel cimport vector_to_nd_array from ..utils.murmurhash cimport murmurhash3_bytes_s32 +from ..utils._vector_sentinel cimport vector_to_nd_array cnp.import_array() diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py index 2bbe1f432d10b..9c330f593dbdc 100644 --- a/sklearn/feature_extraction/image.py +++ b/sklearn/feature_extraction/image.py @@ -9,15 +9,14 @@ # Vlad Niculae # License: BSD 3 clause -import numbers from itertools import product - +import numbers import numpy as np -from numpy.lib.stride_tricks import as_strided from scipy import sparse +from numpy.lib.stride_tricks import as_strided -from ..base import BaseEstimator from ..utils import check_array, check_random_state +from ..base import BaseEstimator __all__ = [ "PatchExtractor", diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index 119ed5a98af02..ebdb0f084e67d 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -3,11 +3,12 @@ # License: BSD 3 clause from random import Random - import numpy as np -import pytest import scipy.sparse as sp -from numpy.testing import assert_allclose, assert_array_equal +from numpy.testing import assert_array_equal +from numpy.testing import assert_allclose + +import pytest from sklearn.feature_extraction import DictVectorizer from sklearn.feature_selection import SelectKBest, chi2 diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py index 2bdfdb75e405b..79ec2922e16d8 100644 --- a/sklearn/feature_extraction/tests/test_feature_hasher.py +++ b/sklearn/feature_extraction/tests/test_feature_hasher.py @@ -1,6 +1,6 @@ import numpy as np -import pytest from numpy.testing import assert_array_equal +import pytest from sklearn.feature_extraction import FeatureHasher from sklearn.feature_extraction._hashing_fast import transform as _hashing_transform diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py index 9d900f584aa97..41348a3535693 100644 --- a/sklearn/feature_extraction/tests/test_image.py +++ b/sklearn/feature_extraction/tests/test_image.py @@ -3,18 +3,18 @@ # License: BSD 3 clause import numpy as np -import pytest import scipy as sp from scipy import ndimage from scipy.sparse.csgraph import connected_components +import pytest from sklearn.feature_extraction.image import ( - PatchExtractor, - _extract_patches, - extract_patches_2d, - grid_to_graph, img_to_graph, + grid_to_graph, + extract_patches_2d, reconstruct_from_patches_2d, + PatchExtractor, + _extract_patches, ) diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 6d4a438b437ee..b46958c36002e 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -1,37 +1,43 @@ -import pickle -import re -import warnings -from collections import defaultdict from collections.abc import Mapping -from functools import partial -from io import StringIO +import re -import numpy as np import pytest -from numpy.testing import assert_array_almost_equal, assert_array_equal +import warnings from scipy import sparse -from sklearn.base import clone -from sklearn.feature_extraction.text import ( - ENGLISH_STOP_WORDS, - CountVectorizer, - HashingVectorizer, - TfidfTransformer, - TfidfVectorizer, - strip_accents_ascii, - strip_accents_unicode, - strip_tags, -) -from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split +from sklearn.feature_extraction.text import strip_tags +from sklearn.feature_extraction.text import strip_accents_unicode +from sklearn.feature_extraction.text import strip_accents_ascii + +from sklearn.feature_extraction.text import HashingVectorizer +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.feature_extraction.text import TfidfVectorizer + +from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS + +from sklearn.model_selection import train_test_split +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline from sklearn.svm import LinearSVC + +from sklearn.base import clone + +import numpy as np +from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_equal from sklearn.utils import IS_PYPY from sklearn.utils._testing import ( - assert_allclose_dense_sparse, assert_almost_equal, fails_if_pypy, + assert_allclose_dense_sparse, skip_if_32bit, ) +from collections import defaultdict +from functools import partial +import pickle +from io import StringIO JUNK_FOOD_DOCS = ( "the pizza pizza beer copyright", diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 46deab05c25fa..b565aeadc53c8 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -12,26 +12,27 @@ """ import array -import numbers -import re -import unicodedata -import warnings from collections import defaultdict from collections.abc import Mapping from functools import partial +import numbers from operator import itemgetter +import re +import unicodedata +import warnings import numpy as np import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin, _OneToOneFeatureMixin -from ..exceptions import NotFittedError from ..preprocessing import normalize -from ..utils import _IS_32BIT -from ..utils.deprecation import deprecated -from ..utils.validation import FLOAT_DTYPES, check_array, check_is_fitted, check_scalar from ._hash import FeatureHasher from ._stop_words import ENGLISH_STOP_WORDS +from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES, check_scalar +from ..utils.deprecation import deprecated +from ..utils import _IS_32BIT +from ..exceptions import NotFittedError + __all__ = [ "HashingVectorizer", diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py index 4fbc631155078..ce5fbc10ee459 100644 --- a/sklearn/feature_selection/__init__.py +++ b/sklearn/feature_selection/__init__.py @@ -4,25 +4,31 @@ recursive feature elimination algorithm. """ -from ._base import SelectorMixin +from ._univariate_selection import chi2 +from ._univariate_selection import f_classif +from ._univariate_selection import f_oneway +from ._univariate_selection import f_regression +from ._univariate_selection import r_regression +from ._univariate_selection import SelectPercentile +from ._univariate_selection import SelectKBest +from ._univariate_selection import SelectFpr +from ._univariate_selection import SelectFdr +from ._univariate_selection import SelectFwe +from ._univariate_selection import GenericUnivariateSelect + +from ._variance_threshold import VarianceThreshold + +from ._rfe import RFE +from ._rfe import RFECV + from ._from_model import SelectFromModel -from ._mutual_info import mutual_info_classif, mutual_info_regression -from ._rfe import RFE, RFECV + from ._sequential import SequentialFeatureSelector -from ._univariate_selection import ( - GenericUnivariateSelect, - SelectFdr, - SelectFpr, - SelectFwe, - SelectKBest, - SelectPercentile, - chi2, - f_classif, - f_oneway, - f_regression, - r_regression, -) -from ._variance_threshold import VarianceThreshold + +from ._mutual_info import mutual_info_regression, mutual_info_classif + +from ._base import SelectorMixin + __all__ = [ "GenericUnivariateSelect", diff --git a/sklearn/feature_selection/_base.py b/sklearn/feature_selection/_base.py index 50ab88540aa1d..e306c102cdd53 100644 --- a/sklearn/feature_selection/_base.py +++ b/sklearn/feature_selection/_base.py @@ -8,11 +8,15 @@ from operator import attrgetter import numpy as np -from scipy.sparse import csc_matrix, issparse +from scipy.sparse import issparse, csc_matrix from ..base import TransformerMixin from ..cross_decomposition._pls import _PLS -from ..utils import check_array, safe_mask, safe_sqr +from ..utils import ( + check_array, + safe_mask, + safe_sqr, +) from ..utils._tags import _safe_tags from ..utils.validation import _check_feature_names_in diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py index 9f163dcdc419d..0c41c66fbef1f 100644 --- a/sklearn/feature_selection/_from_model.py +++ b/sklearn/feature_selection/_from_model.py @@ -1,17 +1,19 @@ # Authors: Gilles Louppe, Mathieu Blondel, Maheshakya Wijewardena # License: BSD 3 clause -import numbers from copy import deepcopy import numpy as np +import numbers -from ..base import BaseEstimator, MetaEstimatorMixin, clone -from ..exceptions import NotFittedError +from ._base import SelectorMixin +from ._base import _get_feature_importances +from ..base import BaseEstimator, clone, MetaEstimatorMixin from ..utils._tags import _safe_tags +from ..utils.validation import check_is_fitted, check_scalar, _num_features + +from ..exceptions import NotFittedError from ..utils.metaestimators import available_if -from ..utils.validation import _num_features, check_is_fitted, check_scalar -from ._base import SelectorMixin, _get_feature_importances def _calculate_threshold(estimator, importances, threshold): diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py index c5529c0276783..47db2601c44c0 100644 --- a/sklearn/feature_selection/_mutual_info.py +++ b/sklearn/feature_selection/_mutual_info.py @@ -6,11 +6,11 @@ from scipy.special import digamma from ..metrics.cluster import mutual_info_score -from ..neighbors import KDTree, NearestNeighbors +from ..neighbors import NearestNeighbors, KDTree from ..preprocessing import scale from ..utils import check_random_state -from ..utils.multiclass import check_classification_targets from ..utils.validation import check_array, check_X_y +from ..utils.multiclass import check_classification_targets def _compute_mi_cc(x, y, n_neighbors): diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py index 546660a14fee8..0f82e1775ee15 100644 --- a/sklearn/feature_selection/_rfe.py +++ b/sklearn/feature_selection/_rfe.py @@ -6,22 +6,26 @@ """Recursive feature elimination for feature ranking""" -import numbers - import numpy as np - +import numbers from joblib import Parallel, effective_n_jobs -from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier -from ..metrics import check_scoring -from ..model_selection import check_cv -from ..model_selection._validation import _score + +from ..utils.metaestimators import available_if +from ..utils.metaestimators import _safe_split from ..utils._tags import _safe_tags -from ..utils.deprecation import deprecated -from ..utils.fixes import delayed -from ..utils.metaestimators import _safe_split, available_if from ..utils.validation import check_is_fitted -from ._base import SelectorMixin, _get_feature_importances +from ..utils.fixes import delayed +from ..utils.deprecation import deprecated +from ..base import BaseEstimator +from ..base import MetaEstimatorMixin +from ..base import clone +from ..base import is_classifier +from ..model_selection import check_cv +from ..model_selection._validation import _score +from ..metrics import check_scoring +from ._base import SelectorMixin +from ._base import _get_feature_importances def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer): diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py index 6b5886aba5ff0..ed802c46b815c 100644 --- a/sklearn/feature_selection/_sequential.py +++ b/sklearn/feature_selection/_sequential.py @@ -2,15 +2,16 @@ Sequential feature selection """ import numbers -import warnings import numpy as np +import warnings + +from ._base import SelectorMixin from ..base import BaseEstimator, MetaEstimatorMixin, clone -from ..model_selection import cross_val_score from ..utils._tags import _safe_tags from ..utils.validation import check_is_fitted -from ._base import SelectorMixin +from ..model_selection import cross_val_score class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator): diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py index 5356cb136dc1e..7754ea3bea7f4 100644 --- a/sklearn/feature_selection/_univariate_selection.py +++ b/sklearn/feature_selection/_univariate_selection.py @@ -5,16 +5,16 @@ # License: BSD 3 clause +import numpy as np import warnings -import numpy as np from scipy import special, stats from scipy.sparse import issparse from ..base import BaseEstimator from ..preprocessing import LabelBinarizer -from ..utils import as_float_array, check_array, check_X_y, safe_mask, safe_sqr -from ..utils.extmath import row_norms, safe_sparse_dot +from ..utils import as_float_array, check_array, check_X_y, safe_sqr, safe_mask +from ..utils.extmath import safe_sparse_dot, row_norms from ..utils.validation import check_is_fitted from ._base import SelectorMixin diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py index 2b3b1def8b41f..7f274b3a308ef 100644 --- a/sklearn/feature_selection/_variance_threshold.py +++ b/sklearn/feature_selection/_variance_threshold.py @@ -2,11 +2,10 @@ # License: 3-clause BSD import numpy as np - from ..base import BaseEstimator +from ._base import SelectorMixin from ..utils.sparsefuncs import mean_variance_axis, min_max_axis from ..utils.validation import check_is_fitted -from ._base import SelectorMixin class VarianceThreshold(SelectorMixin, BaseEstimator): diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py index 4a45e185a62bc..9df0749427976 100644 --- a/sklearn/feature_selection/tests/test_base.py +++ b/sklearn/feature_selection/tests/test_base.py @@ -1,8 +1,9 @@ import numpy as np import pytest -from numpy.testing import assert_array_equal from scipy import sparse as sp +from numpy.testing import assert_array_equal + from sklearn.base import BaseEstimator from sklearn.feature_selection._base import SelectorMixin from sklearn.utils import check_array diff --git a/sklearn/feature_selection/tests/test_chi2.py b/sklearn/feature_selection/tests/test_chi2.py index 4fdc652a998a9..d7d830459e455 100644 --- a/sklearn/feature_selection/tests/test_chi2.py +++ b/sklearn/feature_selection/tests/test_chi2.py @@ -7,12 +7,13 @@ import numpy as np import pytest -import scipy.stats from scipy.sparse import coo_matrix, csr_matrix +import scipy.stats from sklearn.feature_selection import SelectKBest, chi2 from sklearn.feature_selection._univariate_selection import _chisquare -from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal # Feature 0 is highly informative for class 1; # feature 1 is the same everywhere; diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index 9227261ee1241..815d8a35201d5 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -3,36 +3,35 @@ """ import itertools import warnings - import numpy as np -import pytest from numpy.testing import assert_allclose -from scipy import sparse, stats +from scipy import stats, sparse + +import pytest + +from sklearn.utils._testing import assert_almost_equal, _convert_container +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils import safe_mask from sklearn.datasets import make_classification, make_regression from sklearn.feature_selection import ( - GenericUnivariateSelect, - SelectFdr, - SelectFpr, - SelectFwe, - SelectKBest, - SelectPercentile, chi2, f_classif, f_oneway, f_regression, + GenericUnivariateSelect, mutual_info_classif, mutual_info_regression, r_regression, + SelectPercentile, + SelectKBest, + SelectFpr, + SelectFdr, + SelectFwe, ) -from sklearn.utils import safe_mask -from sklearn.utils._testing import ( - _convert_container, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) + ############################################################################## # Test the score functions diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 830569dd8ec66..de45d9e0ab6a4 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -1,33 +1,27 @@ import re +import pytest +import numpy as np import warnings from unittest.mock import Mock -import numpy as np -import pytest +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import skip_if_32bit +from sklearn.utils._testing import MinimalClassifier from sklearn import datasets -from sklearn.base import BaseEstimator from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression from sklearn.datasets import make_friedman1 -from sklearn.decomposition import PCA -from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier from sklearn.exceptions import NotFittedError +from sklearn.linear_model import LogisticRegression, SGDClassifier, Lasso +from sklearn.svm import LinearSVC from sklearn.feature_selection import SelectFromModel -from sklearn.linear_model import ( - Lasso, - LogisticRegression, - PassiveAggressiveClassifier, - SGDClassifier, -) +from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.base import BaseEstimator from sklearn.pipeline import make_pipeline -from sklearn.svm import LinearSVC -from sklearn.utils._testing import ( - MinimalClassifier, - assert_allclose, - assert_array_almost_equal, - assert_array_equal, - skip_if_32bit, -) +from sklearn.decomposition import PCA class NaNTag(BaseEstimator): diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py index cf98d5d54e83b..af2b733efd62d 100644 --- a/sklearn/feature_selection/tests/test_mutual_info.py +++ b/sklearn/feature_selection/tests/test_mutual_info.py @@ -2,10 +2,13 @@ import pytest from scipy.sparse import csr_matrix -from sklearn.feature_selection import mutual_info_classif, mutual_info_regression -from sklearn.feature_selection._mutual_info import _compute_mi from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_allclose, assert_array_equal +from sklearn.utils._testing import ( + assert_array_equal, + assert_allclose, +) +from sklearn.feature_selection._mutual_info import _compute_mi +from sklearn.feature_selection import mutual_info_regression, mutual_info_classif def test_compute_mi_dd(): diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index ad1420732a3c5..75b84a8d2cbb1 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -4,26 +4,31 @@ from operator import attrgetter -import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal +import numpy as np +from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose from scipy import sparse from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.compose import TransformedTargetRegressor -from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression -from sklearn.datasets import load_iris, make_friedman1 -from sklearn.ensemble import RandomForestClassifier +from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA from sklearn.feature_selection import RFE, RFECV +from sklearn.datasets import load_iris, make_friedman1 +from sklearn.metrics import zero_one_loss +from sklearn.svm import SVC, SVR, LinearSVR from sklearn.linear_model import LogisticRegression -from sklearn.metrics import get_scorer, make_scorer, zero_one_loss -from sklearn.model_selection import GroupKFold, cross_val_score +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import GroupKFold +from sklearn.compose import TransformedTargetRegressor from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from sklearn.svm import SVC, SVR, LinearSVR + from sklearn.utils import check_random_state from sklearn.utils._testing import ignore_warnings +from sklearn.metrics import make_scorer +from sklearn.metrics import get_scorer + class MockClassifier: """ @@ -298,8 +303,8 @@ def test_rfecv_mockclassifier(): def test_rfecv_verbose_output(): # Check verbose=1 is producing an output. - import sys from io import StringIO + import sys sys.stdout = StringIO() diff --git a/sklearn/feature_selection/tests/test_sequential.py b/sklearn/feature_selection/tests/test_sequential.py index 3196dac0f1192..3daac62e19922 100644 --- a/sklearn/feature_selection/tests/test_sequential.py +++ b/sklearn/feature_selection/tests/test_sequential.py @@ -1,16 +1,16 @@ -import numpy as np import pytest import scipy +import numpy as np from numpy.testing import assert_array_equal -from sklearn.cluster import KMeans -from sklearn.datasets import make_blobs, make_regression -from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import make_pipeline from sklearn.feature_selection import SequentialFeatureSelector +from sklearn.datasets import make_regression, make_blobs from sklearn.linear_model import LinearRegression +from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.model_selection import cross_val_score -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import StandardScaler +from sklearn.cluster import KMeans @pytest.mark.parametrize("n_features_to_select", (0, 5, 0.0, -1, 1.1)) diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py index fe4cde7dcdb93..55d20e9675654 100644 --- a/sklearn/feature_selection/tests/test_variance_threshold.py +++ b/sklearn/feature_selection/tests/test_variance_threshold.py @@ -1,9 +1,11 @@ import numpy as np import pytest + +from sklearn.utils._testing import assert_array_equal + from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix from sklearn.feature_selection import VarianceThreshold -from sklearn.utils._testing import assert_array_equal data = [[0, 1, 2, 3, 4], [0, 2, 2, 3, 5], [1, 1, 2, 4, 0]] diff --git a/sklearn/gaussian_process/__init__.py b/sklearn/gaussian_process/__init__.py index bc0d902b45b18..719208b7951be 100644 --- a/sklearn/gaussian_process/__init__.py +++ b/sklearn/gaussian_process/__init__.py @@ -8,8 +8,9 @@ based regression and classification. """ -from . import kernels -from ._gpc import GaussianProcessClassifier from ._gpr import GaussianProcessRegressor +from ._gpc import GaussianProcessClassifier +from . import kernels + __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"] diff --git a/sklearn/gaussian_process/_gpc.py b/sklearn/gaussian_process/_gpc.py index 20ce80c9d47d4..061aa95287500 100644 --- a/sklearn/gaussian_process/_gpc.py +++ b/sklearn/gaussian_process/_gpc.py @@ -7,18 +7,18 @@ from operator import itemgetter import numpy as np +from scipy.linalg import cholesky, cho_solve, solve import scipy.optimize -from scipy.linalg import cho_solve, cholesky, solve from scipy.special import erf, expit from ..base import BaseEstimator, ClassifierMixin, clone -from ..multiclass import OneVsOneClassifier, OneVsRestClassifier -from ..preprocessing import LabelEncoder +from .kernels import RBF, CompoundKernel, ConstantKernel as C +from ..utils.validation import check_is_fitted from ..utils import check_random_state from ..utils.optimize import _check_optimize_result -from ..utils.validation import check_is_fitted -from .kernels import RBF, CompoundKernel -from .kernels import ConstantKernel as C +from ..preprocessing import LabelEncoder +from ..multiclass import OneVsRestClassifier, OneVsOneClassifier + # Values required for approximating the logistic sigmoid by # error functions. coefs are obtained via: diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py index bf68636a334f9..c7d8db7b63702 100644 --- a/sklearn/gaussian_process/_gpr.py +++ b/sklearn/gaussian_process/_gpr.py @@ -8,15 +8,15 @@ from operator import itemgetter import numpy as np +from scipy.linalg import cholesky, cho_solve, solve_triangular import scipy.optimize -from scipy.linalg import cho_solve, cholesky, solve_triangular -from ..base import BaseEstimator, MultiOutputMixin, RegressorMixin, clone +from ..base import BaseEstimator, RegressorMixin, clone +from ..base import MultiOutputMixin +from .kernels import RBF, ConstantKernel as C from ..preprocessing._data import _handle_zeros_in_scale from ..utils import check_random_state from ..utils.optimize import _check_optimize_result -from .kernels import RBF -from .kernels import ConstantKernel as C GPR_CHOLESKY_LOWER = True diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 2d9bcc8974558..4e36dfa7add42 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -19,20 +19,21 @@ # Note: this module is strongly inspired by the kernel module of the george # package. -import math -import warnings from abc import ABCMeta, abstractmethod from collections import namedtuple +import math from inspect import signature import numpy as np -from scipy.spatial.distance import cdist, pdist, squareform -from scipy.special import gamma, kv +from scipy.special import kv, gamma +from scipy.spatial.distance import pdist, cdist, squareform -from ..base import clone -from ..exceptions import ConvergenceWarning from ..metrics.pairwise import pairwise_kernels +from ..base import clone from ..utils.validation import _num_samples +from ..exceptions import ConvergenceWarning + +import warnings def _check_length_scale(X, length_scale): diff --git a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py index 4667329aff9b8..ad81890680168 100644 --- a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py +++ b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py @@ -1,12 +1,8 @@ +from sklearn.gaussian_process.kernels import Kernel, Hyperparameter +from sklearn.gaussian_process.kernels import GenericKernelMixin +from sklearn.gaussian_process.kernels import StationaryKernelMixin import numpy as np - from sklearn.base import clone -from sklearn.gaussian_process.kernels import ( - GenericKernelMixin, - Hyperparameter, - Kernel, - StationaryKernelMixin, -) class MiniSeqKernel(GenericKernelMixin, StationaryKernelMixin, Kernel): diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py index 777e61d544f23..2173f77c161c1 100644 --- a/sklearn/gaussian_process/tests/test_gpc.py +++ b/sklearn/gaussian_process/tests/test_gpc.py @@ -4,17 +4,22 @@ # License: BSD 3 clause import warnings - import numpy as np -import pytest + from scipy.optimize import approx_fprime -from sklearn.exceptions import ConvergenceWarning +import pytest + from sklearn.gaussian_process import GaussianProcessClassifier -from sklearn.gaussian_process.kernels import RBF, CompoundKernel -from sklearn.gaussian_process.kernels import ConstantKernel as C -from sklearn.gaussian_process.kernels import WhiteKernel +from sklearn.gaussian_process.kernels import ( + RBF, + CompoundKernel, + ConstantKernel as C, + WhiteKernel, +) from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel +from sklearn.exceptions import ConvergenceWarning + from sklearn.utils._testing import assert_almost_equal, assert_array_equal diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index a7bb42e314966..a8e6eda3bf667 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -4,25 +4,25 @@ # Modified by: Pete Green # License: BSD 3 clause -import re -import sys import warnings - +import sys +import re import numpy as np -import pytest + from scipy.optimize import approx_fprime -from sklearn.exceptions import ConvergenceWarning +import pytest + from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.gaussian_process.kernels import RBF -from sklearn.gaussian_process.kernels import ConstantKernel as C -from sklearn.gaussian_process.kernels import DotProduct, ExpSineSquared, WhiteKernel +from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, WhiteKernel +from sklearn.gaussian_process.kernels import DotProduct, ExpSineSquared from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel +from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ( - assert_allclose, + assert_array_less, assert_almost_equal, assert_array_almost_equal, - assert_array_less, + assert_allclose, ) diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py index 8248ad6ffba80..05dc682b13e9c 100644 --- a/sklearn/gaussian_process/tests/test_kernels.py +++ b/sklearn/gaussian_process/tests/test_kernels.py @@ -3,39 +3,41 @@ # Author: Jan Hendrik Metzen # License: BSD 3 clause +import pytest +import numpy as np from inspect import signature -import numpy as np -import pytest +from sklearn.gaussian_process.kernels import _approx_fprime -from sklearn.base import clone +from sklearn.metrics.pairwise import ( + PAIRWISE_KERNEL_FUNCTIONS, + euclidean_distances, + pairwise_kernels, +) from sklearn.gaussian_process.kernels import ( RBF, - CompoundKernel, - ConstantKernel, - DotProduct, - Exponentiation, - ExpSineSquared, - KernelOperator, Matern, - PairwiseKernel, RationalQuadratic, + ExpSineSquared, + DotProduct, + ConstantKernel, WhiteKernel, - _approx_fprime, -) -from sklearn.metrics.pairwise import ( - PAIRWISE_KERNEL_FUNCTIONS, - euclidean_distances, - pairwise_kernels, + PairwiseKernel, + KernelOperator, + Exponentiation, + CompoundKernel, ) +from sklearn.base import clone + from sklearn.utils._testing import ( - assert_allclose, assert_almost_equal, - assert_array_almost_equal, assert_array_equal, + assert_array_almost_equal, + assert_allclose, fails_if_pypy, ) + X = np.random.RandomState(0).normal(0, 1, (5, 2)) Y = np.random.RandomState(0).normal(0, 1, (6, 2)) diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 7ecf58c4f28b6..0c8a6f2c07a21 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -12,10 +12,13 @@ from scipy import stats from ..base import BaseEstimator, TransformerMixin -from ..utils import _is_pandas_na, is_scalar_nan -from ..utils._mask import _get_mask from ..utils.sparsefuncs import _get_median -from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted +from ..utils.validation import check_is_fitted +from ..utils.validation import FLOAT_DTYPES +from ..utils.validation import _check_feature_names_in +from ..utils._mask import _get_mask +from ..utils import _is_pandas_na +from ..utils import is_scalar_nan def _check_inputs_dtype(X, missing_values): diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index 6802a5494a93f..f6c32a6818455 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -1,17 +1,22 @@ -import warnings -from collections import namedtuple from time import time +from collections import namedtuple +import warnings -import numpy as np from scipy import stats +import numpy as np from ..base import clone from ..exceptions import ConvergenceWarning from ..preprocessing import normalize -from ..utils import _safe_indexing, check_array, check_random_state, is_scalar_nan +from ..utils import check_array, check_random_state, _safe_indexing, is_scalar_nan +from ..utils.validation import FLOAT_DTYPES, check_is_fitted +from ..utils.validation import _check_feature_names_in from ..utils._mask import _get_mask -from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted -from ._base import SimpleImputer, _BaseImputer, _check_inputs_dtype + +from ._base import _BaseImputer +from ._base import SimpleImputer +from ._base import _check_inputs_dtype + _ImputerTriplet = namedtuple( "_ImputerTriplet", ["feat_idx", "neighbor_feat_idx", "estimator"] diff --git a/sklearn/impute/_knn.py b/sklearn/impute/_knn.py index da65eccb006b8..497bcfafb074a 100644 --- a/sklearn/impute/_knn.py +++ b/sklearn/impute/_knn.py @@ -4,13 +4,16 @@ import numpy as np +from ._base import _BaseImputer +from ..utils.validation import FLOAT_DTYPES from ..metrics import pairwise_distances_chunked from ..metrics.pairwise import _NAN_METRICS -from ..neighbors._base import _check_weights, _get_weights +from ..neighbors._base import _get_weights +from ..neighbors._base import _check_weights from ..utils import is_scalar_nan from ..utils._mask import _get_mask -from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted -from ._base import _BaseImputer +from ..utils.validation import check_is_fitted +from ..utils.validation import _check_feature_names_in class KNNImputer(_BaseImputer): diff --git a/sklearn/impute/tests/test_base.py b/sklearn/impute/tests/test_base.py index b841a440582fc..837575765f884 100644 --- a/sklearn/impute/tests/test_base.py +++ b/sklearn/impute/tests/test_base.py @@ -1,6 +1,7 @@ -import numpy as np import pytest +import numpy as np + from sklearn.impute._base import _BaseImputer from sklearn.utils._mask import _get_mask diff --git a/sklearn/impute/tests/test_common.py b/sklearn/impute/tests/test_common.py index ec6675e88941a..6d6fc3c649656 100644 --- a/sklearn/impute/tests/test_common.py +++ b/sklearn/impute/tests/test_common.py @@ -1,14 +1,18 @@ -import numpy as np import pytest + +import numpy as np from scipy import sparse +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import assert_array_equal + from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer -from sklearn.utils._testing import ( - assert_allclose, - assert_allclose_dense_sparse, - assert_array_equal, -) + +from sklearn.impute import IterativeImputer +from sklearn.impute import KNNImputer +from sklearn.impute import SimpleImputer + IMPUTERS = [IterativeImputer(tol=0.1), KNNImputer(), SimpleImputer()] SPARSE_IMPUTERS = [SimpleImputer()] diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py index 3b78ffe90a930..dc585571124b5 100644 --- a/sklearn/impute/tests/test_impute.py +++ b/sklearn/impute/tests/test_impute.py @@ -1,30 +1,32 @@ -import io +import pytest import warnings import numpy as np -import pytest from scipy import sparse from scipy.stats import kstest -from sklearn import tree -from sklearn.datasets import load_diabetes -from sklearn.dummy import DummyRegressor -from sklearn.exceptions import ConvergenceWarning +import io + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal # make IterativeImputer available from sklearn.experimental import enable_iterative_imputer # noqa -from sklearn.impute import IterativeImputer, MissingIndicator, SimpleImputer -from sklearn.impute._base import _most_frequent -from sklearn.linear_model import ARDRegression, BayesianRidge, RidgeCV + +from sklearn.datasets import load_diabetes +from sklearn.impute import MissingIndicator +from sklearn.impute import SimpleImputer, IterativeImputer +from sklearn.dummy import DummyRegressor +from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV +from sklearn.pipeline import Pipeline +from sklearn.pipeline import make_union from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import Pipeline, make_union +from sklearn import tree from sklearn.random_projection import _sparse_random_matrix -from sklearn.utils._testing import ( - assert_allclose, - assert_allclose_dense_sparse, - assert_array_almost_equal, - assert_array_equal, -) +from sklearn.exceptions import ConvergenceWarning +from sklearn.impute._base import _most_frequent def _assert_array_equal_and_same_dtype(x, y): diff --git a/sklearn/impute/tests/test_knn.py b/sklearn/impute/tests/test_knn.py index 8c0ca9cad557d..098899bc1a0f1 100644 --- a/sklearn/impute/tests/test_knn.py +++ b/sklearn/impute/tests/test_knn.py @@ -3,7 +3,8 @@ from sklearn import config_context from sklearn.impute import KNNImputer -from sklearn.metrics.pairwise import nan_euclidean_distances, pairwise_distances +from sklearn.metrics.pairwise import nan_euclidean_distances +from sklearn.metrics.pairwise import pairwise_distances from sklearn.neighbors import KNeighborsRegressor from sklearn.utils._testing import assert_allclose diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index e9213d5abb77b..76c44ea81bbbe 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,10 +1,13 @@ """The :mod:`sklearn.inspection` module includes tools for model inspection.""" -from ._partial_dependence import partial_dependence from ._permutation_importance import permutation_importance from ._plot.decision_boundary import DecisionBoundaryDisplay -from ._plot.partial_dependence import PartialDependenceDisplay, plot_partial_dependence + +from ._partial_dependence import partial_dependence +from ._plot.partial_dependence import plot_partial_dependence +from ._plot.partial_dependence import PartialDependenceDisplay + __all__ = [ "partial_dependence", diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py index 1d2100391f02f..ebb7a11e16835 100644 --- a/sklearn/inspection/_partial_dependence.py +++ b/sklearn/inspection/_partial_dependence.py @@ -12,23 +12,22 @@ from scipy.stats.mstats import mquantiles from ..base import is_classifier, is_regressor +from ..utils.extmath import cartesian +from ..utils import check_array +from ..utils import check_matplotlib_support # noqa +from ..utils import _safe_indexing +from ..utils import _determine_key_type +from ..utils import _get_column_indices +from ..utils.validation import check_is_fitted +from ..utils import Bunch +from ..tree import DecisionTreeRegressor from ..ensemble import RandomForestRegressor +from ..exceptions import NotFittedError from ..ensemble._gb import BaseGradientBoosting from ..ensemble._hist_gradient_boosting.gradient_boosting import ( BaseHistGradientBoosting, ) -from ..exceptions import NotFittedError -from ..tree import DecisionTreeRegressor -from ..utils import check_matplotlib_support # noqa -from ..utils import ( - Bunch, - _determine_key_type, - _get_column_indices, - _safe_indexing, - check_array, -) -from ..utils.extmath import cartesian -from ..utils.validation import check_is_fitted + __all__ = [ "partial_dependence", diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py index 808190591ad6e..204dcd9117c77 100644 --- a/sklearn/inspection/_permutation_importance.py +++ b/sklearn/inspection/_permutation_importance.py @@ -1,15 +1,15 @@ """Permutation importance for estimators.""" import numbers - import numpy as np - from joblib import Parallel from ..ensemble._bagging import _generate_indices from ..metrics import check_scoring from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer from ..model_selection._validation import _aggregate_score_dicts -from ..utils import Bunch, _safe_indexing, check_array, check_random_state +from ..utils import Bunch, _safe_indexing +from ..utils import check_random_state +from ..utils import check_array from ..utils.fixes import delayed diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 0bb22ce61ad4d..15e8e15e87569 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -2,10 +2,11 @@ import numpy as np -from ...base import is_regressor from ...preprocessing import LabelEncoder -from ...utils import _safe_indexing, check_matplotlib_support -from ...utils.validation import _is_arraylike_not_scalar, check_is_fitted +from ...utils import check_matplotlib_support +from ...utils import _safe_indexing +from ...base import is_regressor +from ...utils.validation import check_is_fitted, _is_arraylike_not_scalar def _check_boundary_response_method(estimator, response_method): diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index 211880fa2e9c2..1ab0cd083bb58 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -6,14 +6,17 @@ import numpy as np from scipy import sparse from scipy.stats.mstats import mquantiles - from joblib import Parallel +from .. import partial_dependence from ...base import is_regressor +from ...utils import Bunch +from ...utils import check_array +from ...utils import deprecated from ...utils import check_matplotlib_support # noqa -from ...utils import Bunch, _safe_indexing, check_array, check_random_state, deprecated +from ...utils import check_random_state +from ...utils import _safe_indexing from ...utils.fixes import delayed -from .. import partial_dependence @deprecated( diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py index 0f197e019f949..8981c9d5a5e83 100644 --- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py +++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py @@ -1,19 +1,21 @@ import warnings -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.datasets import ( - load_iris, - make_classification, - make_multilabel_classification, -) +from sklearn.base import BaseEstimator +from sklearn.base import ClassifierMixin +from sklearn.datasets import make_classification +from sklearn.linear_model import LogisticRegression +from sklearn.datasets import load_iris +from sklearn.datasets import make_multilabel_classification +from sklearn.tree import DecisionTreeRegressor +from sklearn.tree import DecisionTreeClassifier + from sklearn.inspection import DecisionBoundaryDisplay from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method -from sklearn.linear_model import LogisticRegression -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py index 0bf6217f0707e..65f5ce83d7a06 100644 --- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py +++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py @@ -1,22 +1,22 @@ -import warnings - import numpy as np +from scipy.stats.mstats import mquantiles + import pytest from numpy.testing import assert_allclose -from scipy.stats.mstats import mquantiles +import warnings -from sklearn.datasets import ( - load_diabetes, - load_iris, - make_classification, - make_regression, -) -from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor -from sklearn.inspection import PartialDependenceDisplay -from sklearn.inspection import plot_partial_dependence as plot_partial_dependence_func +from sklearn.datasets import load_diabetes +from sklearn.datasets import load_iris +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.ensemble import GradientBoostingClassifier from sklearn.linear_model import LinearRegression from sklearn.utils._testing import _convert_container +from sklearn.inspection import plot_partial_dependence as plot_partial_dependence_func +from sklearn.inspection import PartialDependenceDisplay + + # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 7bf47a850ce19..4e62f140c6953 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -6,39 +6,40 @@ import pytest import sklearn -from sklearn.base import BaseEstimator, ClassifierMixin, clone -from sklearn.cluster import KMeans -from sklearn.compose import make_column_transformer -from sklearn.datasets import load_iris, make_classification, make_regression -from sklearn.dummy import DummyClassifier -from sklearn.ensemble import ( - GradientBoostingClassifier, - GradientBoostingRegressor, - HistGradientBoostingClassifier, - HistGradientBoostingRegressor, - RandomForestRegressor, -) -from sklearn.exceptions import NotFittedError from sklearn.inspection import partial_dependence from sklearn.inspection._partial_dependence import ( _grid_from_X, _partial_dependence_brute, _partial_dependence_recursion, ) -from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.ensemble import HistGradientBoostingRegressor +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import LogisticRegression +from sklearn.linear_model import MultiTaskLasso +from sklearn.tree import DecisionTreeRegressor +from sklearn.datasets import load_iris +from sklearn.datasets import make_classification, make_regression +from sklearn.cluster import KMeans +from sklearn.compose import make_column_transformer from sklearn.metrics import r2_score +from sklearn.preprocessing import PolynomialFeatures +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import RobustScaler +from sklearn.preprocessing import scale from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import ( - PolynomialFeatures, - RobustScaler, - StandardScaler, - scale, -) -from sklearn.tree import DecisionTreeRegressor -from sklearn.tree.tests.test_tree import assert_is_subtree +from sklearn.dummy import DummyClassifier +from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.exceptions import NotFittedError +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_equal from sklearn.utils import _IS_32BIT -from sklearn.utils._testing import assert_allclose, assert_array_equal from sklearn.utils.validation import check_random_state +from sklearn.tree.tests.test_tree import assert_is_subtree + # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py index 219df01f0b4f9..20d0c289a9a7d 100644 --- a/sklearn/inspection/tests/test_permutation_importance.py +++ b/sklearn/inspection/tests/test_permutation_importance.py @@ -1,23 +1,31 @@ -import numpy as np import pytest +import numpy as np + from numpy.testing import assert_allclose from sklearn.compose import ColumnTransformer -from sklearn.datasets import ( - load_diabetes, - load_iris, - make_classification, - make_regression, -) +from sklearn.datasets import load_diabetes +from sklearn.datasets import load_iris +from sklearn.datasets import make_classification +from sklearn.datasets import make_regression from sklearn.dummy import DummyClassifier -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import RandomForestClassifier +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import LogisticRegression from sklearn.impute import SimpleImputer from sklearn.inspection import permutation_importance -from sklearn.linear_model import LinearRegression, LogisticRegression -from sklearn.metrics import get_scorer, mean_squared_error, r2_score from sklearn.model_selection import train_test_split +from sklearn.metrics import ( + get_scorer, + mean_squared_error, + r2_score, +) from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale +from sklearn.preprocessing import KBinsDiscretizer +from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import scale from sklearn.utils import parallel_backend from sklearn.utils._testing import _convert_container diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index 48ce181b481cc..db19a52daf867 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -3,17 +3,17 @@ # Nelle Varoquaux # License: BSD 3 clause -import math -import warnings - import numpy as np from scipy import interpolate from scipy.stats import spearmanr +import warnings +import math -from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique -from .base import BaseEstimator, RegressorMixin, TransformerMixin +from .base import BaseEstimator, TransformerMixin, RegressorMixin from .utils import check_array, check_consistent_length from .utils.validation import _check_sample_weight +from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique + __all__ = ["check_increasing", "isotonic_regression", "IsotonicRegression"] diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index 46ce0fbcd3e29..1e4f4c6aa1301 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -19,15 +19,15 @@ except ImportError: # scipy < 1.4 from scipy.fftpack import fft, ifft -from .base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from .metrics.pairwise import KERNEL_PARAMS, pairwise_kernels +from .base import BaseEstimator +from .base import TransformerMixin +from .base import _ClassNamePrefixFeaturesOutMixin from .utils import check_random_state from .utils.extmath import safe_sparse_dot -from .utils.validation import ( - _check_feature_names_in, - check_is_fitted, - check_non_negative, -) +from .utils.validation import check_is_fitted +from .utils.validation import _check_feature_names_in +from .metrics.pairwise import pairwise_kernels, KERNEL_PARAMS +from .utils.validation import check_non_negative class PolynomialCountSketch( diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py index ba32c1c0365aa..cc83e114338be 100644 --- a/sklearn/kernel_ridge.py +++ b/sklearn/kernel_ridge.py @@ -6,10 +6,10 @@ import numpy as np -from .base import BaseEstimator, MultiOutputMixin, RegressorMixin -from .linear_model._ridge import _solve_cholesky_kernel +from .base import BaseEstimator, RegressorMixin, MultiOutputMixin from .metrics.pairwise import pairwise_kernels -from .utils.validation import _check_sample_weight, check_is_fitted +from .linear_model._ridge import _solve_cholesky_kernel +from .utils.validation import check_is_fitted, _check_sample_weight class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator): diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py index 45c99d4d36df1..d5a14756c41a9 100644 --- a/sklearn/linear_model/__init__.py +++ b/sklearn/linear_model/__init__.py @@ -7,44 +7,46 @@ # complete documentation. from ._base import LinearRegression -from ._bayes import ARDRegression, BayesianRidge +from ._bayes import BayesianRidge, ARDRegression +from ._least_angle import ( + Lars, + LassoLars, + lars_path, + lars_path_gram, + LarsCV, + LassoLarsCV, + LassoLarsIC, +) from ._coordinate_descent import ( - ElasticNet, - ElasticNetCV, Lasso, + ElasticNet, LassoCV, + ElasticNetCV, + lasso_path, + enet_path, + MultiTaskLasso, MultiTaskElasticNet, MultiTaskElasticNetCV, - MultiTaskLasso, MultiTaskLassoCV, - enet_path, - lasso_path, ) -from ._glm import GammaRegressor, PoissonRegressor, TweedieRegressor +from ._glm import PoissonRegressor, GammaRegressor, TweedieRegressor from ._huber import HuberRegressor -from ._least_angle import ( - Lars, - LarsCV, - LassoLars, - LassoLarsCV, - LassoLarsIC, - lars_path, - lars_path_gram, -) +from ._sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber +from ._stochastic_gradient import SGDClassifier, SGDRegressor, SGDOneClassSVM +from ._ridge import Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV, ridge_regression from ._logistic import LogisticRegression, LogisticRegressionCV from ._omp import ( - OrthogonalMatchingPursuit, - OrthogonalMatchingPursuitCV, orthogonal_mp, orthogonal_mp_gram, + OrthogonalMatchingPursuit, + OrthogonalMatchingPursuitCV, ) -from ._passive_aggressive import PassiveAggressiveClassifier, PassiveAggressiveRegressor +from ._passive_aggressive import PassiveAggressiveClassifier +from ._passive_aggressive import PassiveAggressiveRegressor from ._perceptron import Perceptron + from ._quantile import QuantileRegressor from ._ransac import RANSACRegressor -from ._ridge import Ridge, RidgeClassifier, RidgeClassifierCV, RidgeCV, ridge_regression -from ._sgd_fast import Hinge, Huber, Log, ModifiedHuber, SquaredLoss -from ._stochastic_gradient import SGDClassifier, SGDOneClassSVM, SGDRegressor from ._theil_sen import TheilSenRegressor __all__ = [ diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index d6458961a4314..5b23c346cbc5f 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -14,31 +14,31 @@ # Maria Telenczuk # License: BSD 3 clause +from abc import ABCMeta, abstractmethod import numbers import warnings -from abc import ABCMeta, abstractmethod import numpy as np import scipy.sparse as sp -from scipy import linalg, optimize, sparse +from scipy import linalg +from scipy import optimize +from scipy import sparse from scipy.sparse.linalg import lsqr from scipy.special import expit - from joblib import Parallel -from ..base import BaseEstimator, ClassifierMixin, MultiOutputMixin, RegressorMixin +from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin from ..preprocessing._data import _is_constant_feature -from ..utils import check_array, check_random_state -from ..utils._seq_dataset import ( - ArrayDataset32, - ArrayDataset64, - CSRDataset32, - CSRDataset64, -) -from ..utils.extmath import _incremental_mean_and_var, safe_sparse_dot +from ..utils import check_array +from ..utils.validation import FLOAT_DTYPES +from ..utils import check_random_state +from ..utils.extmath import safe_sparse_dot +from ..utils.extmath import _incremental_mean_and_var +from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale +from ..utils._seq_dataset import ArrayDataset32, CSRDataset32 +from ..utils._seq_dataset import ArrayDataset64, CSRDataset64 +from ..utils.validation import check_is_fitted, _check_sample_weight from ..utils.fixes import delayed -from ..utils.sparsefuncs import inplace_column_scale, mean_variance_axis -from ..utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted # TODO: bayesian_ridge_regression and bayesian_regression_ard # should be squashed into its respective objects. diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index 33a81e48aa3ce..e9a88d6e2a65b 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -6,15 +6,16 @@ # License: BSD 3 clause from math import log - import numpy as np from scipy import linalg -from scipy.linalg import pinvh +from ._base import LinearModel, _preprocess_data, _rescale_data from ..base import RegressorMixin +from ._base import _deprecate_normalize from ..utils.extmath import fast_logdet +from scipy.linalg import pinvh from ..utils.validation import _check_sample_weight -from ._base import LinearModel, _deprecate_normalize, _preprocess_data, _rescale_data + ############################################################################### # BayesianRidge regression diff --git a/sklearn/linear_model/_cd_fast.pyx b/sklearn/linear_model/_cd_fast.pyx index 19bfa0197da3f..4c605c2911740 100644 --- a/sklearn/linear_model/_cd_fast.pyx +++ b/sklearn/linear_model/_cd_fast.pyx @@ -6,33 +6,21 @@ # # License: BSD 3 clause -cimport numpy as cnp from libc.math cimport fabs - +cimport numpy as cnp import numpy as np import numpy.linalg as linalg from cpython cimport bool from cython cimport floating - import warnings - from ..exceptions import ConvergenceWarning -from ..utils._cython_blas cimport ( - ColMajor, - NoTrans, - RowMajor, - Trans, - _asum, - _axpy, - _copy, - _dot, - _gemv, - _ger, - _nrm2, - _scal, -) +from ..utils._cython_blas cimport (_axpy, _dot, _asum, _ger, _gemv, _nrm2, + _copy, _scal) +from ..utils._cython_blas cimport RowMajor, ColMajor, Trans, NoTrans + + from ..utils._random cimport our_rand_r ctypedef cnp.float64_t DOUBLE diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py index 2a9628943e466..779af3626e18d 100644 --- a/sklearn/linear_model/_coordinate_descent.py +++ b/sklearn/linear_model/_coordinate_descent.py @@ -5,33 +5,34 @@ # # License: BSD 3 clause -import numbers import sys import warnings +import numbers from abc import ABC, abstractmethod from functools import partial import numpy as np from scipy import sparse - from joblib import Parallel, effective_n_jobs -from ..base import MultiOutputMixin, RegressorMixin +from ._base import LinearModel, _pre_fit +from ..base import RegressorMixin, MultiOutputMixin +from ._base import _preprocess_data, _deprecate_normalize +from ..utils import check_array +from ..utils import check_scalar +from ..utils.validation import check_random_state from ..model_selection import check_cv -from ..utils import check_array, check_scalar from ..utils.extmath import safe_sparse_dot -from ..utils.fixes import delayed from ..utils.validation import ( _check_sample_weight, check_consistent_length, check_is_fitted, - check_random_state, column_or_1d, ) +from ..utils.fixes import delayed # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast' from . import _cd_fast as cd_fast # type: ignore -from ._base import LinearModel, _deprecate_normalize, _pre_fit, _preprocess_data def _set_order(X, y, order="C"): diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py index 1b82bbd77bcf9..fea9c4d4cf6ba 100644 --- a/sklearn/linear_model/_glm/__init__.py +++ b/sklearn/linear_model/_glm/__init__.py @@ -1,10 +1,10 @@ # License: BSD 3 clause from .glm import ( - GammaRegressor, + _GeneralizedLinearRegressor, PoissonRegressor, + GammaRegressor, TweedieRegressor, - _GeneralizedLinearRegressor, ) __all__ = [ diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py index 8f05fca1d1361..d337eaa7a4a18 100644 --- a/sklearn/linear_model/_glm/glm.py +++ b/sklearn/linear_model/_glm/glm.py @@ -20,10 +20,10 @@ HalfTweedieLossIdentity, ) from ...base import BaseEstimator, RegressorMixin -from ...utils import check_array, check_scalar, deprecated -from ...utils._openmp_helpers import _openmp_effective_n_threads from ...utils.optimize import _check_optimize_result -from ...utils.validation import _check_sample_weight, check_is_fitted +from ...utils import check_scalar, check_array, deprecated +from ...utils.validation import check_is_fitted, _check_sample_weight +from ...utils._openmp_helpers import _openmp_effective_n_threads from .._linear_loss import LinearModelLoss diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py index e3817037f9dcb..d29fde2eb30d7 100644 --- a/sklearn/linear_model/_glm/tests/test_glm.py +++ b/sklearn/linear_model/_glm/tests/test_glm.py @@ -3,24 +3,20 @@ # License: BSD 3 clause import re -import warnings - import numpy as np -import pytest from numpy.testing import assert_allclose +import pytest +import warnings +from sklearn.base import clone from sklearn._loss.glm_distribution import TweedieDistribution from sklearn._loss.link import IdentityLink, LogLink -from sklearn.base import clone + from sklearn.datasets import make_regression -from sklearn.exceptions import ConvergenceWarning -from sklearn.linear_model import ( - GammaRegressor, - PoissonRegressor, - Ridge, - TweedieRegressor, -) from sklearn.linear_model._glm import _GeneralizedLinearRegressor +from sklearn.linear_model import TweedieRegressor, PoissonRegressor, GammaRegressor +from sklearn.linear_model import Ridge +from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import d2_tweedie_score from sklearn.model_selection import train_test_split diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py index d72346faed2e4..3fdf5aa73743f 100644 --- a/sklearn/linear_model/_huber.py +++ b/sklearn/linear_model/_huber.py @@ -2,14 +2,15 @@ # License: BSD 3 clause import numpy as np + from scipy import optimize from ..base import BaseEstimator, RegressorMixin +from ._base import LinearModel from ..utils import axis0_safe_slice +from ..utils.validation import _check_sample_weight from ..utils.extmath import safe_sparse_dot from ..utils.optimize import _check_optimize_result -from ..utils.validation import _check_sample_weight -from ._base import LinearModel def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None): diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index 08a3ddb01ceac..22284cd71d0ff 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -8,24 +8,25 @@ # # License: BSD 3 clause +from math import log import sys import warnings -from math import log import numpy as np -from scipy import interpolate, linalg +from scipy import linalg, interpolate from scipy.linalg.lapack import get_lapack_funcs - from joblib import Parallel -from ..base import MultiOutputMixin, RegressorMixin -from ..exceptions import ConvergenceWarning -from ..model_selection import check_cv +from ._base import LinearModel, LinearRegression +from ._base import _deprecate_normalize, _preprocess_data +from ..base import RegressorMixin, MultiOutputMixin # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs' -from ..utils import arrayfuncs, as_float_array, check_random_state # type: ignore +from ..utils import arrayfuncs, as_float_array # type: ignore +from ..utils import check_random_state +from ..model_selection import check_cv +from ..exceptions import ConvergenceWarning from ..utils.fixes import delayed -from ._base import LinearModel, LinearRegression, _deprecate_normalize, _preprocess_data SOLVE_TRIANGULAR_ARGS = {"check_finite": False} diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py index 0ed2e148fecdc..64a99325dcd7a 100644 --- a/sklearn/linear_model/_linear_loss.py +++ b/sklearn/linear_model/_linear_loss.py @@ -3,7 +3,6 @@ """ import numpy as np from scipy import sparse - from ..utils.extmath import squared_norm diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index 2df456b0af5f3..72b602e409801 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -15,28 +15,25 @@ import numpy as np from scipy import optimize - from joblib import Parallel, effective_n_jobs +from ._base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator +from ._linear_loss import LinearModelLoss +from ._sag import sag_solver from .._loss.loss import HalfBinomialLoss, HalfMultinomialLoss -from ..metrics import get_scorer -from ..model_selection import check_cv -from ..preprocessing import LabelBinarizer, LabelEncoder +from ..preprocessing import LabelEncoder, LabelBinarizer from ..svm._base import _fit_liblinear -from ..utils import ( - check_array, - check_consistent_length, - check_random_state, - compute_class_weight, -) -from ..utils.extmath import row_norms, softmax -from ..utils.fixes import delayed +from ..utils import check_array, check_consistent_length, compute_class_weight +from ..utils import check_random_state +from ..utils.extmath import softmax +from ..utils.extmath import row_norms +from ..utils.optimize import _newton_cg, _check_optimize_result +from ..utils.validation import check_is_fitted, _check_sample_weight from ..utils.multiclass import check_classification_targets -from ..utils.optimize import _check_optimize_result, _newton_cg -from ..utils.validation import _check_sample_weight, check_is_fitted -from ._base import BaseEstimator, LinearClassifierMixin, SparseCoefMixin -from ._linear_loss import LinearModelLoss -from ._sag import sag_solver +from ..utils.fixes import delayed +from ..model_selection import check_cv +from ..metrics import get_scorer + _LOGISTIC_SOLVER_CONVERGENCE_MSG = ( "Please also refer to the documentation for alternative solver options:\n" @@ -875,7 +872,8 @@ class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator): .. seealso:: Refer to the User Guide for more information regarding :class:`LogisticRegression` and more specifically the - :ref:`Table ` summarazing solver/penalty supports. + `Table `_ + summarazing solver/penalty supports. .. versionadded:: 0.17 Stochastic Average Gradient descent solver. diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py index d70d309ab157f..b86c35c41de85 100644 --- a/sklearn/linear_model/_omp.py +++ b/sklearn/linear_model/_omp.py @@ -11,14 +11,13 @@ import numpy as np from scipy import linalg from scipy.linalg.lapack import get_lapack_funcs - from joblib import Parallel -from ..base import MultiOutputMixin, RegressorMixin -from ..model_selection import check_cv +from ._base import LinearModel, _pre_fit, _deprecate_normalize +from ..base import RegressorMixin, MultiOutputMixin from ..utils import as_float_array, check_array from ..utils.fixes import delayed -from ._base import LinearModel, _deprecate_normalize, _pre_fit +from ..model_selection import check_cv premature = ( "Orthogonal matching pursuit ended prematurely due to linear" diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py index 5c010d9069599..65f754ba35f55 100644 --- a/sklearn/linear_model/_passive_aggressive.py +++ b/sklearn/linear_model/_passive_aggressive.py @@ -1,7 +1,9 @@ # Authors: Rob Zinkov, Mathieu Blondel # License: BSD 3 clause -from ._stochastic_gradient import DEFAULT_EPSILON, BaseSGDClassifier, BaseSGDRegressor +from ._stochastic_gradient import BaseSGDClassifier +from ._stochastic_gradient import BaseSGDRegressor +from ._stochastic_gradient import DEFAULT_EPSILON class PassiveAggressiveClassifier(BaseSGDClassifier): diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py index 628f7b46c81b3..bc5d59e6fca0c 100644 --- a/sklearn/linear_model/_quantile.py +++ b/sklearn/linear_model/_quantile.py @@ -8,11 +8,11 @@ from scipy.optimize import linprog from ..base import BaseEstimator, RegressorMixin +from ._base import LinearModel from ..exceptions import ConvergenceWarning from ..utils import _safe_indexing -from ..utils.fixes import parse_version, sp_version from ..utils.validation import _check_sample_weight -from ._base import LinearModel +from ..utils.fixes import sp_version, parse_version class QuantileRegressor(LinearModel, RegressorMixin, BaseEstimator): diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index 4d51551e7b667..8d20005430769 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -2,22 +2,17 @@ # # License: BSD 3 clause -import warnings - import numpy as np +import warnings -from ..base import ( - BaseEstimator, - MetaEstimatorMixin, - MultiOutputMixin, - RegressorMixin, - clone, -) -from ..exceptions import ConvergenceWarning -from ..utils import check_consistent_length, check_random_state +from ..base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone +from ..base import MultiOutputMixin +from ..utils import check_random_state, check_consistent_length from ..utils.random import sample_without_replacement -from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter +from ..utils.validation import check_is_fitted, _check_sample_weight from ._base import LinearRegression +from ..utils.validation import has_fit_parameter +from ..exceptions import ConvergenceWarning _EPSILON = np.spacing(1) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 2314e88c3bb7a..dee703b73c059 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -9,38 +9,35 @@ # License: BSD 3 clause -import numbers -import warnings from abc import ABCMeta, abstractmethod from functools import partial +import warnings import numpy as np -from scipy import linalg, optimize, sparse +import numbers +from scipy import linalg +from scipy import sparse +from scipy import optimize from scipy.sparse import linalg as sp_linalg +from ._base import LinearClassifierMixin, LinearModel +from ._base import _deprecate_normalize, _preprocess_data, _rescale_data +from ._sag import sag_solver from ..base import MultiOutputMixin, RegressorMixin, is_classifier -from ..exceptions import ConvergenceWarning -from ..metrics import check_scoring -from ..model_selection import GridSearchCV +from ..utils.extmath import safe_sparse_dot +from ..utils.extmath import row_norms +from ..utils import check_array +from ..utils import check_consistent_length +from ..utils import check_scalar +from ..utils import compute_sample_weight +from ..utils import column_or_1d +from ..utils.validation import check_is_fitted +from ..utils.validation import _check_sample_weight from ..preprocessing import LabelBinarizer -from ..utils import ( - check_array, - check_consistent_length, - check_scalar, - column_or_1d, - compute_sample_weight, -) -from ..utils.extmath import row_norms, safe_sparse_dot +from ..model_selection import GridSearchCV +from ..metrics import check_scoring +from ..exceptions import ConvergenceWarning from ..utils.sparsefuncs import mean_variance_axis -from ..utils.validation import _check_sample_weight, check_is_fitted -from ._base import ( - LinearClassifierMixin, - LinearModel, - _deprecate_normalize, - _preprocess_data, - _rescale_data, -) -from ._sag import sag_solver def _get_rescaled_operator(X, X_offset, sample_weight_sqrt): diff --git a/sklearn/linear_model/_sag.py b/sklearn/linear_model/_sag.py index 2626955ec2a7f..b7860edd43031 100644 --- a/sklearn/linear_model/_sag.py +++ b/sklearn/linear_model/_sag.py @@ -8,12 +8,12 @@ import numpy as np +from ._base import make_dataset +from ._sag_fast import sag32, sag64 from ..exceptions import ConvergenceWarning from ..utils import check_array -from ..utils.extmath import row_norms from ..utils.validation import _check_sample_weight -from ._base import make_dataset -from ._sag_fast import sag32, sag64 +from ..utils.extmath import row_norms def get_auto_step_size( diff --git a/sklearn/linear_model/_sgd_fast.pyx b/sklearn/linear_model/_sgd_fast.pyx index 3e76d4c2c60b1..bcb1d05d3a8be 100644 --- a/sklearn/linear_model/_sgd_fast.pyx +++ b/sklearn/linear_model/_sgd_fast.pyx @@ -6,21 +6,18 @@ # License: BSD 3 clause +import numpy as np import sys from time import time -import numpy as np - +from libc.math cimport exp, log, sqrt, pow, fabs cimport numpy as cnp -from libc.math cimport exp, fabs, log, pow, sqrt from numpy.math cimport INFINITY - - cdef extern from "_sgd_fast_helpers.h": bint skl_isfinite(double) nogil -from ..utils._seq_dataset cimport SequentialDataset64 as SequentialDataset from ..utils._weight_vector cimport WeightVector64 as WeightVector +from ..utils._seq_dataset cimport SequentialDataset64 as SequentialDataset cnp.import_array() diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py index 59babd074e9b2..a4c129d101ef1 100644 --- a/sklearn/linear_model/_stochastic_gradient.py +++ b/sklearn/linear_model/_stochastic_gradient.py @@ -6,34 +6,36 @@ Descent (SGD). """ +import numpy as np import warnings -from abc import ABCMeta, abstractmethod -import numpy as np +from abc import ABCMeta, abstractmethod from joblib import Parallel -from ..base import BaseEstimator, OutlierMixin, RegressorMixin, clone, is_classifier -from ..exceptions import ConvergenceWarning -from ..model_selection import ShuffleSplit, StratifiedShuffleSplit -from ..utils import check_random_state, compute_class_weight -from ..utils.extmath import safe_sparse_dot -from ..utils.fixes import delayed +from ..base import clone, is_classifier +from ._base import LinearClassifierMixin, SparseCoefMixin +from ._base import make_dataset +from ..base import BaseEstimator, RegressorMixin, OutlierMixin +from ..utils import check_random_state from ..utils.metaestimators import available_if +from ..utils.extmath import safe_sparse_dot from ..utils.multiclass import _check_partial_fit_first_call -from ..utils.validation import _check_sample_weight, check_is_fitted -from ._base import LinearClassifierMixin, SparseCoefMixin, make_dataset -from ._sgd_fast import ( - EpsilonInsensitive, - Hinge, - Huber, - Log, - ModifiedHuber, - SquaredEpsilonInsensitive, - SquaredHinge, - SquaredLoss, - _plain_sgd, -) +from ..utils.validation import check_is_fitted, _check_sample_weight +from ..utils.fixes import delayed +from ..exceptions import ConvergenceWarning +from ..model_selection import StratifiedShuffleSplit, ShuffleSplit + +from ._sgd_fast import _plain_sgd +from ..utils import compute_class_weight +from ._sgd_fast import Hinge +from ._sgd_fast import SquaredHinge +from ._sgd_fast import Log +from ._sgd_fast import ModifiedHuber +from ._sgd_fast import SquaredLoss +from ._sgd_fast import Huber +from ._sgd_fast import EpsilonInsensitive +from ._sgd_fast import SquaredEpsilonInsensitive LEARNING_RATE_TYPES = { "constant": 1, diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index 51f341f932ee7..b51c5d8695e01 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -7,23 +7,22 @@ # License: BSD 3 clause -import numbers import warnings +import numbers from itertools import combinations import numpy as np from scipy import linalg -from scipy.linalg.lapack import get_lapack_funcs from scipy.special import binom - +from scipy.linalg.lapack import get_lapack_funcs from joblib import Parallel, effective_n_jobs +from ._base import LinearModel from ..base import RegressorMixin -from ..exceptions import ConvergenceWarning from ..utils import check_random_state -from ..utils.fixes import delayed from ..utils.validation import check_scalar -from ._base import LinearModel +from ..utils.fixes import delayed +from ..exceptions import ConvergenceWarning _EPSILON = np.finfo(np.double).eps diff --git a/sklearn/linear_model/setup.py b/sklearn/linear_model/setup.py index ce02400af7736..74d7d9e2b05ea 100644 --- a/sklearn/linear_model/setup.py +++ b/sklearn/linear_model/setup.py @@ -1,5 +1,4 @@ import os - import numpy from sklearn._build_utils import gen_from_templates diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 301c17ccc63b5..26433109b334d 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -4,27 +4,28 @@ # # License: BSD 3 clause +import pytest import warnings import numpy as np -import pytest -from scipy import linalg, sparse +from scipy import sparse +from scipy import linalg -from sklearn.datasets import load_iris, make_regression, make_sparse_uncorrelated -from sklearn.linear_model import LinearRegression -from sklearn.linear_model._base import ( - _deprecate_normalize, - _preprocess_data, - _rescale_data, - make_dataset, -) -from sklearn.preprocessing import StandardScaler, add_dummy_feature +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - assert_array_equal, -) + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model._base import _deprecate_normalize +from sklearn.linear_model._base import _preprocess_data +from sklearn.linear_model._base import _rescale_data +from sklearn.linear_model._base import make_dataset +from sklearn.datasets import make_sparse_uncorrelated +from sklearn.datasets import make_regression +from sklearn.datasets import load_iris +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import add_dummy_feature rng = np.random.RandomState(0) rtol = 1e-6 diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 02273717cd968..4044aefc3e446 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -8,14 +8,14 @@ import numpy as np import pytest -from sklearn import datasets -from sklearn.linear_model import ARDRegression, BayesianRidge, Ridge + +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_less from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_less, -) +from sklearn.linear_model import BayesianRidge, ARDRegression +from sklearn.linear_model import Ridge +from sklearn import datasets from sklearn.utils.extmath import fast_logdet diabetes = datasets.load_diabetes() diff --git a/sklearn/linear_model/tests/test_common.py b/sklearn/linear_model/tests/test_common.py index 06dee1a00706d..49e506227ccfa 100644 --- a/sklearn/linear_model/tests/test_common.py +++ b/sklearn/linear_model/tests/test_common.py @@ -2,24 +2,23 @@ # # License: BSD 3 clause +import pytest + import sys import warnings - import numpy as np -import pytest from sklearn.base import is_classifier -from sklearn.linear_model import ( - ARDRegression, - BayesianRidge, - LinearRegression, - Ridge, - RidgeClassifier, - RidgeClassifierCV, - RidgeCV, -) -from sklearn.utils import check_random_state +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import Ridge +from sklearn.linear_model import RidgeCV +from sklearn.linear_model import RidgeClassifier +from sklearn.linear_model import RidgeClassifierCV +from sklearn.linear_model import BayesianRidge +from sklearn.linear_model import ARDRegression + from sklearn.utils.fixes import np_version, parse_version +from sklearn.utils import check_random_state @pytest.mark.parametrize( diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 2b93931a63d9a..e5d7ba358c1f5 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -2,28 +2,51 @@ # Alexandre Gramfort # License: BSD 3 clause -import warnings -from copy import deepcopy - import numpy as np import pytest +import warnings from scipy import interpolate, sparse - +from copy import deepcopy import joblib -from sklearn.base import clone, is_classifier -from sklearn.datasets import load_diabetes, make_regression + +from sklearn.base import is_classifier +from sklearn.base import clone +from sklearn.datasets import load_diabetes +from sklearn.datasets import make_regression +from sklearn.model_selection import ( + GridSearchCV, + LeaveOneGroupOut, + train_test_split, +) +from sklearn.pipeline import make_pipeline +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler from sklearn.exceptions import ConvergenceWarning +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import _convert_container + +from sklearn.utils._testing import TempMemmap +from sklearn.utils import check_random_state +from sklearn.utils.sparsefuncs import mean_variance_axis + from sklearn.linear_model import ( ARDRegression, BayesianRidge, ElasticNet, ElasticNetCV, + enet_path, Lars, + lars_path, Lasso, LassoCV, LassoLars, LassoLarsCV, LassoLarsIC, + lasso_path, LinearRegression, MultiTaskElasticNet, MultiTaskElasticNetCV, @@ -34,25 +57,11 @@ RidgeClassifier, RidgeClassifierCV, RidgeCV, - enet_path, - lars_path, - lasso_path, ) + from sklearn.linear_model._coordinate_descent import _set_order -from sklearn.model_selection import GridSearchCV, LeaveOneGroupOut, train_test_split -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.preprocessing import StandardScaler -from sklearn.utils import check_array, check_random_state -from sklearn.utils._testing import ( - TempMemmap, - _convert_container, - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) -from sklearn.utils.sparsefuncs import mean_variance_axis +from sklearn.utils import check_array + # FIXME: 'normalize' to be removed in 1.2 filterwarnings_normalize = pytest.mark.filterwarnings( @@ -350,8 +359,8 @@ def test_lasso_cv(): def test_lasso_cv_with_some_model_selection(): - from sklearn import datasets from sklearn.model_selection import ShuffleSplit + from sklearn import datasets diabetes = datasets.load_diabetes() X = diabetes.data diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index d2552d1b990fd..88a5d096772b3 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -4,14 +4,13 @@ import numpy as np from scipy import optimize, sparse +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal + from sklearn.datasets import make_regression -from sklearn.linear_model import HuberRegressor, LinearRegression, Ridge, SGDRegressor +from sklearn.linear_model import HuberRegressor, LinearRegression, SGDRegressor, Ridge from sklearn.linear_model._huber import _huber_loss_and_gradient -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) def make_regression_with_outliers(n_samples=50, n_features=20): diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index a7db721f645e4..db5a3dfd114b9 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -3,29 +3,20 @@ import numpy as np import pytest from scipy import linalg - -from sklearn import datasets, linear_model from sklearn.base import clone -from sklearn.exceptions import ConvergenceWarning -from sklearn.linear_model import ( - Lars, - LarsCV, - LassoLars, - LassoLarsCV, - LassoLarsIC, - lars_path, -) -from sklearn.linear_model._least_angle import _lars_path_residues from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import TempMemmap from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - TempMemmap, - assert_allclose, - assert_array_almost_equal, - ignore_warnings, -) +from sklearn.exceptions import ConvergenceWarning +from sklearn import linear_model, datasets +from sklearn.linear_model._least_angle import _lars_path_residues +from sklearn.linear_model import LassoLarsIC, lars_path +from sklearn.linear_model import Lars, LassoLars, LarsCV, LassoLarsCV # TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() @@ -68,8 +59,8 @@ def test_simple(): # Principle of Lars is to keep covariances tied and decreasing # also test verbose output - import sys from io import StringIO + import sys old_stdout = sys.stdout try: diff --git a/sklearn/linear_model/tests/test_linear_loss.py b/sklearn/linear_model/tests/test_linear_loss.py index bfe874389ffc8..d4e20ad69ca8a 100644 --- a/sklearn/linear_model/tests/test_linear_loss.py +++ b/sklearn/linear_model/tests/test_linear_loss.py @@ -4,16 +4,21 @@ Note that correctness of losses (which compose LinearModelLoss) is already well covered in the _loss module. """ -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose from scipy import linalg, optimize, sparse -from sklearn._loss.loss import HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss +from sklearn._loss.loss import ( + HalfBinomialLoss, + HalfMultinomialLoss, + HalfPoissonLoss, +) from sklearn.datasets import make_low_rank_matrix from sklearn.linear_model._linear_loss import LinearModelLoss from sklearn.utils.extmath import squared_norm + # We do not need to test all losses, just what LinearModelLoss does on top of the # base losses. LOSSES = [HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss] diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 2b7f2a327d7b1..5bb2b83094290 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -1,37 +1,36 @@ import itertools import os import re - import numpy as np -import pytest -from numpy.testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) +from numpy.testing import assert_allclose, assert_almost_equal +from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse +import pytest + from sklearn.base import clone from sklearn.datasets import load_iris, make_classification -from sklearn.exceptions import ConvergenceWarning +from sklearn.metrics import log_loss +from sklearn.metrics import get_scorer +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import train_test_split +from sklearn.model_selection import cross_val_score +from sklearn.preprocessing import LabelEncoder, StandardScaler +from sklearn.utils import compute_class_weight, _IS_32BIT +from sklearn.utils._testing import ignore_warnings +from sklearn.utils import shuffle from sklearn.linear_model import SGDClassifier +from sklearn.preprocessing import scale +from sklearn.utils._testing import skip_if_no_parallel + +from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model._logistic import ( - LogisticRegression, - LogisticRegressionCV, _log_reg_scoring_path, _logistic_regression_path, + LogisticRegression, + LogisticRegressionCV, ) -from sklearn.metrics import get_scorer, log_loss -from sklearn.model_selection import ( - GridSearchCV, - StratifiedKFold, - cross_val_score, - train_test_split, -) -from sklearn.preprocessing import LabelEncoder, StandardScaler, scale -from sklearn.utils import _IS_32BIT, compute_class_weight, shuffle -from sklearn.utils._testing import ignore_warnings, skip_if_no_parallel X = [[-1, 0], [0, 1], [1, 1]] X_sp = sparse.csr_matrix(X) diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index e1144dca5302e..1a9a0a8b40c82 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -1,26 +1,25 @@ # Author: Vlad Niculae # License: BSD 3 clause -import warnings - import numpy as np import pytest +import warnings + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import ignore_warnings + -from sklearn.datasets import make_sparse_coded_signal from sklearn.linear_model import ( - LinearRegression, - OrthogonalMatchingPursuit, - OrthogonalMatchingPursuitCV, orthogonal_mp, orthogonal_mp_gram, + OrthogonalMatchingPursuit, + OrthogonalMatchingPursuitCV, + LinearRegression, ) from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) +from sklearn.datasets import make_sparse_coded_signal n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3 y, X, gamma = make_sparse_coded_signal( diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py index c6555895ef6cc..3ff92bd69a43b 100644 --- a/sklearn/linear_model/tests/test_passive_aggressive.py +++ b/sklearn/linear_model/tests/test_passive_aggressive.py @@ -1,16 +1,17 @@ import numpy as np -import pytest import scipy.sparse as sp -from sklearn.base import ClassifierMixin, is_classifier -from sklearn.datasets import load_iris -from sklearn.linear_model import PassiveAggressiveClassifier, PassiveAggressiveRegressor +import pytest + +from sklearn.base import is_classifier +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.base import ClassifierMixin from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) +from sklearn.datasets import load_iris +from sklearn.linear_model import PassiveAggressiveClassifier +from sklearn.linear_model import PassiveAggressiveRegressor iris = load_iris() random_state = check_random_state(12) diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py index e2c947a887bde..4c4f092c69d71 100644 --- a/sklearn/linear_model/tests/test_perceptron.py +++ b/sklearn/linear_model/tests/test_perceptron.py @@ -1,11 +1,12 @@ import numpy as np -import pytest import scipy.sparse as sp +import pytest +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils import check_random_state from sklearn.datasets import load_iris from sklearn.linear_model import Perceptron -from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_allclose, assert_array_almost_equal iris = load_iris() random_state = check_random_state(12) diff --git a/sklearn/linear_model/tests/test_quantile.py b/sklearn/linear_model/tests/test_quantile.py index a61f424a3cbbe..4c22c46aff463 100644 --- a/sklearn/linear_model/tests/test_quantile.py +++ b/sklearn/linear_model/tests/test_quantile.py @@ -5,14 +5,15 @@ import numpy as np import pytest from pytest import approx -from scipy import sparse from scipy.optimize import minimize +from scipy import sparse from sklearn.datasets import make_regression from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import HuberRegressor, QuantileRegressor from sklearn.metrics import mean_pinball_loss -from sklearn.utils._testing import assert_allclose, skip_if_32bit +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import skip_if_32bit from sklearn.utils.fixes import parse_version, sp_version diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index 62731cad5e767..53f6b2d1f75eb 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -1,19 +1,18 @@ import numpy as np import pytest -from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse -from sklearn.datasets import make_regression -from sklearn.exceptions import ConvergenceWarning -from sklearn.linear_model import ( - LinearRegression, - OrthogonalMatchingPursuit, - RANSACRegressor, - Ridge, -) -from sklearn.linear_model._ransac import _dynamic_max_trials +from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_equal + from sklearn.utils import check_random_state from sklearn.utils._testing import assert_allclose +from sklearn.datasets import make_regression +from sklearn.linear_model import LinearRegression, RANSACRegressor, Ridge +from sklearn.linear_model import OrthogonalMatchingPursuit +from sklearn.linear_model._ransac import _dynamic_max_trials +from sklearn.exceptions import ConvergenceWarning + # Generate coordinates of line X = np.arange(-200, 200) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index eb7006c9ed91e..1f05d821efed4 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1,54 +1,53 @@ -import warnings -from itertools import product - import numpy as np -import pytest import scipy.sparse as sp from scipy import linalg +from itertools import product + +import pytest +import warnings + +from sklearn.utils import _IS_32BIT +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.estimator_checks import check_sample_weights_invariance -from sklearn import datasets -from sklearn.datasets import ( - make_classification, - make_low_rank_matrix, - make_multilabel_classification, - make_regression, -) from sklearn.exceptions import ConvergenceWarning -from sklearn.linear_model import ( - LinearRegression, - Ridge, - RidgeClassifier, - RidgeClassifierCV, - RidgeCV, - ridge_regression, -) -from sklearn.linear_model._ridge import ( - _check_gcv_mode, - _RidgeGCV, - _solve_cholesky, - _solve_cholesky_kernel, - _solve_lbfgs, - _solve_svd, - _X_CenterStackOp, -) -from sklearn.metrics import get_scorer, make_scorer, mean_squared_error -from sklearn.model_selection import ( - GridSearchCV, - GroupKFold, - KFold, - LeaveOneOut, - cross_val_predict, -) + +from sklearn import datasets +from sklearn.metrics import mean_squared_error +from sklearn.metrics import make_scorer +from sklearn.metrics import get_scorer + +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import ridge_regression +from sklearn.linear_model import Ridge +from sklearn.linear_model._ridge import _RidgeGCV +from sklearn.linear_model import RidgeCV +from sklearn.linear_model import RidgeClassifier +from sklearn.linear_model import RidgeClassifierCV +from sklearn.linear_model._ridge import _solve_cholesky +from sklearn.linear_model._ridge import _solve_cholesky_kernel +from sklearn.linear_model._ridge import _solve_svd +from sklearn.linear_model._ridge import _solve_lbfgs +from sklearn.linear_model._ridge import _check_gcv_mode +from sklearn.linear_model._ridge import _X_CenterStackOp +from sklearn.datasets import make_low_rank_matrix +from sklearn.datasets import make_regression +from sklearn.datasets import make_classification +from sklearn.datasets import make_multilabel_classification + +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import KFold +from sklearn.model_selection import GroupKFold +from sklearn.model_selection import cross_val_predict +from sklearn.model_selection import LeaveOneOut + from sklearn.preprocessing import minmax_scale -from sklearn.utils import _IS_32BIT, check_random_state -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) -from sklearn.utils.estimator_checks import check_sample_weights_invariance +from sklearn.utils import check_random_state + SOLVERS = ("svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga") SPARSE_SOLVERS_WITH_INTERCEPT = ("sparse_cg", "sag") diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py index d49da2b1e1e5b..d3a27c4088ab7 100644 --- a/sklearn/linear_model/tests/test_sag.py +++ b/sklearn/linear_model/tests/test_sag.py @@ -5,28 +5,27 @@ import math import re - -import numpy as np import pytest +import numpy as np import scipy.sparse as sp from scipy.special import logsumexp from sklearn._loss.loss import HalfMultinomialLoss -from sklearn.base import clone -from sklearn.datasets import load_iris, make_blobs, make_classification -from sklearn.linear_model import LogisticRegression, Ridge -from sklearn.linear_model._base import make_dataset from sklearn.linear_model._linear_loss import LinearModelLoss from sklearn.linear_model._sag import get_auto_step_size from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples -from sklearn.preprocessing import LabelBinarizer, LabelEncoder -from sklearn.utils import check_random_state, compute_class_weight -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, -) +from sklearn.linear_model import LogisticRegression, Ridge +from sklearn.linear_model._base import make_dataset + from sklearn.utils.extmath import row_norms +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils import compute_class_weight +from sklearn.utils import check_random_state +from sklearn.preprocessing import LabelEncoder, LabelBinarizer +from sklearn.datasets import make_blobs, load_iris, make_classification +from sklearn.base import clone iris = load_iris() diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 0402b421e5c56..1a48afeeb48db 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -1,32 +1,29 @@ import pickle -from unittest.mock import Mock -import numpy as np +import joblib import pytest +import numpy as np import scipy.sparse as sp +from unittest.mock import Mock -import joblib -from sklearn import datasets, linear_model, metrics +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import ignore_warnings + +from sklearn import linear_model, datasets, metrics from sklearn.base import clone, is_classifier -from sklearn.exceptions import ConvergenceWarning +from sklearn.svm import OneClassSVM +from sklearn.preprocessing import LabelEncoder, scale, MinMaxScaler +from sklearn.preprocessing import StandardScaler from sklearn.kernel_approximation import Nystroem +from sklearn.pipeline import make_pipeline +from sklearn.exceptions import ConvergenceWarning +from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit from sklearn.linear_model import _sgd_fast as sgd_fast from sklearn.linear_model import _stochastic_gradient -from sklearn.model_selection import ( - RandomizedSearchCV, - ShuffleSplit, - StratifiedShuffleSplit, -) -from sklearn.pipeline import make_pipeline -from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, scale -from sklearn.svm import OneClassSVM -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) +from sklearn.model_selection import RandomizedSearchCV def _update_kwargs(kwargs): diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py index c1513ecc0c10b..b9d87e5207b7e 100644 --- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py @@ -1,16 +1,17 @@ import numpy as np +from numpy.testing import assert_allclose import pytest import scipy.sparse as sp -from numpy.testing import assert_allclose from sklearn.datasets import make_regression +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_almost_equal + +from sklearn.utils._testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning -from sklearn.linear_model import ElasticNet, ElasticNetCV, Lasso, LassoCV -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - ignore_warnings, -) + +from sklearn.linear_model import Lasso, ElasticNet, LassoCV, ElasticNetCV + # FIXME: 'normalize' to be removed in 1.2 filterwarnings_normalize = pytest.mark.filterwarnings( diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py index 6befe6e836796..b067fa07224ed 100644 --- a/sklearn/linear_model/tests/test_theil_sen.py +++ b/sklearn/linear_model/tests/test_theil_sen.py @@ -8,24 +8,16 @@ import re import sys from contextlib import contextmanager - import numpy as np import pytest -from numpy.testing import ( - assert_array_almost_equal, - assert_array_equal, - assert_array_less, -) +from numpy.testing import assert_array_equal, assert_array_less +from numpy.testing import assert_array_almost_equal from scipy.linalg import norm from scipy.optimize import fmin_bfgs - from sklearn.exceptions import ConvergenceWarning from sklearn.linear_model import LinearRegression, TheilSenRegressor -from sklearn.linear_model._theil_sen import ( - _breakdown_point, - _modified_weiszfeld_step, - _spatial_median, -) +from sklearn.linear_model._theil_sen import _spatial_median, _breakdown_point +from sklearn.linear_model._theil_sen import _modified_weiszfeld_step from sklearn.utils._testing import assert_almost_equal diff --git a/sklearn/manifold/__init__.py b/sklearn/manifold/__init__.py index 1e8d96c7cf94b..ae708aa1fd65c 100644 --- a/sklearn/manifold/__init__.py +++ b/sklearn/manifold/__init__.py @@ -2,8 +2,8 @@ The :mod:`sklearn.manifold` module implements data embedding techniques. """ +from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding from ._isomap import Isomap -from ._locally_linear import LocallyLinearEmbedding, locally_linear_embedding from ._mds import MDS, smacof from ._spectral_embedding import SpectralEmbedding, spectral_embedding from ._t_sne import TSNE, trustworthiness diff --git a/sklearn/manifold/_barnes_hut_tsne.pyx b/sklearn/manifold/_barnes_hut_tsne.pyx index 6b1c1886bdb01..2d314c0ccf3a5 100644 --- a/sklearn/manifold/_barnes_hut_tsne.pyx +++ b/sklearn/manifold/_barnes_hut_tsne.pyx @@ -6,12 +6,11 @@ import numpy as np - cimport numpy as np -from cython.parallel cimport parallel, prange -from libc.math cimport log, sqrt from libc.stdio cimport printf -from libc.stdlib cimport free, malloc +from libc.math cimport sqrt, log +from libc.stdlib cimport malloc, free +from cython.parallel cimport prange, parallel from ..neighbors._quad_tree cimport _QuadTree diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py index 9d15f6162105d..aae9f09fd5a94 100644 --- a/sklearn/manifold/_isomap.py +++ b/sklearn/manifold/_isomap.py @@ -5,15 +5,18 @@ import warnings import numpy as np + from scipy.sparse import issparse -from scipy.sparse.csgraph import connected_components, shortest_path +from scipy.sparse.csgraph import shortest_path +from scipy.sparse.csgraph import connected_components from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin +from ..neighbors import NearestNeighbors, kneighbors_graph +from ..neighbors import radius_neighbors_graph +from ..utils.validation import check_is_fitted from ..decomposition import KernelPCA -from ..neighbors import NearestNeighbors, kneighbors_graph, radius_neighbors_graph from ..preprocessing import KernelCenterer from ..utils.graph import _fix_connected_components -from ..utils.validation import check_is_fitted class Isomap(_ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 095d4f63201e7..a9c6ec350b912 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -5,21 +5,22 @@ # License: BSD 3 clause (C) INRIA 2011 import numpy as np -from scipy.linalg import eigh, qr, solve, svd -from scipy.sparse import csr_matrix, eye +from scipy.linalg import eigh, svd, qr, solve +from scipy.sparse import eye, csr_matrix from scipy.sparse.linalg import eigsh from ..base import ( BaseEstimator, TransformerMixin, - _ClassNamePrefixFeaturesOutMixin, _UnstableArchMixin, + _ClassNamePrefixFeaturesOutMixin, ) -from ..neighbors import NearestNeighbors -from ..utils import check_array, check_random_state +from ..utils import check_random_state, check_array from ..utils._arpack import _init_arpack_v0 from ..utils.extmath import stable_cumsum -from ..utils.validation import FLOAT_DTYPES, check_is_fitted +from ..utils.validation import check_is_fitted +from ..utils.validation import FLOAT_DTYPES +from ..neighbors import NearestNeighbors def barycenter_weights(X, Y, indices, reg=1e-3): diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py index 157349a5ed08c..930f8d19b7b5e 100644 --- a/sklearn/manifold/_mds.py +++ b/sklearn/manifold/_mds.py @@ -5,16 +5,15 @@ # author: Nelle Varoquaux # License: BSD -import warnings - import numpy as np - from joblib import Parallel, effective_n_jobs +import warnings + from ..base import BaseEstimator -from ..isotonic import IsotonicRegression from ..metrics import euclidean_distances -from ..utils import check_array, check_random_state, check_symmetric +from ..utils import check_random_state, check_array, check_symmetric +from ..isotonic import IsotonicRegression from ..utils.fixes import delayed diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py index fe437b4ae2d30..54a72313491ee 100644 --- a/sklearn/manifold/_spectral_embedding.py +++ b/sklearn/manifold/_spectral_embedding.py @@ -10,17 +10,21 @@ import numpy as np from scipy import sparse from scipy.linalg import eigh +from scipy.sparse.linalg import eigsh from scipy.sparse.csgraph import connected_components from scipy.sparse.csgraph import laplacian as csgraph_laplacian -from scipy.sparse.linalg import eigsh from ..base import BaseEstimator -from ..metrics.pairwise import rbf_kernel -from ..neighbors import NearestNeighbors, kneighbors_graph -from ..utils import check_array, check_random_state, check_symmetric +from ..utils import ( + check_array, + check_random_state, + check_symmetric, +) from ..utils._arpack import _init_arpack_v0 from ..utils.extmath import _deterministic_vector_sign_flip from ..utils.fixes import lobpcg +from ..metrics.pairwise import rbf_kernel +from ..neighbors import kneighbors_graph, NearestNeighbors def _graph_connected_component(graph, node_id): diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py index 35fc0ee784a1a..5b7a3c4efd753 100644 --- a/sklearn/manifold/_t_sne.py +++ b/sklearn/manifold/_t_sne.py @@ -10,25 +10,26 @@ import warnings from time import time - import numpy as np from scipy import linalg +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform from scipy.sparse import csr_matrix, issparse -from scipy.spatial.distance import pdist, squareform - -from ..base import BaseEstimator -from ..decomposition import PCA -from ..metrics.pairwise import pairwise_distances from ..neighbors import NearestNeighbors +from ..base import BaseEstimator from ..utils import check_random_state from ..utils._openmp_helpers import _openmp_effective_n_threads from ..utils.validation import check_non_negative +from ..decomposition import PCA +from ..metrics.pairwise import pairwise_distances -# mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne' # mypy error: Module 'sklearn.manifold' has no attribute '_utils' -from . import _barnes_hut_tsne # type: ignore from . import _utils # type: ignore +# mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne' +from . import _barnes_hut_tsne # type: ignore + + MACHINE_EPSILON = np.finfo(np.double).eps diff --git a/sklearn/manifold/_utils.pyx b/sklearn/manifold/_utils.pyx index efcafaab07270..985aa3388d34c 100644 --- a/sklearn/manifold/_utils.pyx +++ b/sklearn/manifold/_utils.pyx @@ -1,8 +1,6 @@ -cimport cython from libc cimport math - +cimport cython import numpy as np - cimport numpy as np from libc.stdio cimport printf diff --git a/sklearn/manifold/tests/test_isomap.py b/sklearn/manifold/tests/test_isomap.py index 6e83f716b9731..73365b08a5cfb 100644 --- a/sklearn/manifold/tests/test_isomap.py +++ b/sklearn/manifold/tests/test_isomap.py @@ -1,20 +1,24 @@ -import math from itertools import product - import numpy as np -import pytest +import math from numpy.testing import ( assert_almost_equal, assert_array_almost_equal, assert_array_equal, ) -from scipy.sparse import rand as sparse_rand +import pytest -from sklearn import datasets, manifold, neighbors, pipeline, preprocessing +from sklearn import datasets +from sklearn import manifold +from sklearn import neighbors +from sklearn import pipeline +from sklearn import preprocessing from sklearn.datasets import make_blobs from sklearn.metrics.pairwise import pairwise_distances from sklearn.utils._testing import assert_allclose, assert_allclose_dense_sparse +from scipy.sparse import rand as sparse_rand + eigen_solvers = ["auto", "dense", "arpack"] path_methods = ["auto", "FW", "D"] diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index 9dc1050c6bd29..4272aa05b71a0 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -1,13 +1,17 @@ from itertools import product import numpy as np -import pytest +from sklearn.utils._testing import ( + assert_allclose, + assert_array_equal, +) from scipy import linalg +import pytest -from sklearn import manifold, neighbors +from sklearn import neighbors, manifold from sklearn.datasets import make_blobs from sklearn.manifold._locally_linear import barycenter_kneighbors_graph -from sklearn.utils._testing import assert_allclose, assert_array_equal, ignore_warnings +from sklearn.utils._testing import ignore_warnings eigen_solvers = ["dense", "arpack"] @@ -130,7 +134,7 @@ def test_pipeline(): # check that LocallyLinearEmbedding works fine as a Pipeline # only checks that no error is raised. # TODO check that it actually does something useful - from sklearn import datasets, pipeline + from sklearn import pipeline, datasets X, y = datasets.make_blobs(random_state=0) clf = pipeline.Pipeline( diff --git a/sklearn/manifold/tests/test_mds.py b/sklearn/manifold/tests/test_mds.py index 57a30f2022207..242549f248f88 100644 --- a/sklearn/manifold/tests/test_mds.py +++ b/sklearn/manifold/tests/test_mds.py @@ -1,6 +1,6 @@ import numpy as np -import pytest from numpy.testing import assert_array_almost_equal +import pytest from sklearn.manifold import _mds as mds diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index bb06b27f54489..935e5408a4159 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -1,21 +1,23 @@ -import numpy as np import pytest + +import numpy as np + from scipy import sparse -from scipy.linalg import eigh from scipy.sparse import csgraph +from scipy.linalg import eigh -from sklearn.cluster import KMeans -from sklearn.datasets import make_blobs -from sklearn.manifold import SpectralEmbedding, spectral_embedding -from sklearn.manifold._spectral_embedding import ( - _graph_connected_component, - _graph_is_connected, -) -from sklearn.metrics import normalized_mutual_info_score +from sklearn.manifold import SpectralEmbedding +from sklearn.manifold._spectral_embedding import _graph_is_connected +from sklearn.manifold._spectral_embedding import _graph_connected_component +from sklearn.manifold import spectral_embedding from sklearn.metrics.pairwise import rbf_kernel +from sklearn.metrics import normalized_mutual_info_score from sklearn.neighbors import NearestNeighbors -from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal +from sklearn.cluster import KMeans +from sklearn.datasets import make_blobs from sklearn.utils.extmath import _deterministic_vector_sign_flip +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal try: from pyamg import smoothed_aggregation_solver # noqa diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index bbd26743317b3..861500e4a8891 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -1,43 +1,39 @@ import sys -import warnings from io import StringIO - import numpy as np -import pytest -import scipy.sparse as sp from numpy.testing import assert_allclose -from scipy.optimize import check_grad -from scipy.spatial.distance import pdist, squareform +import scipy.sparse as sp +import pytest +import warnings -from sklearn.datasets import make_blobs +from sklearn.neighbors import NearestNeighbors +from sklearn.neighbors import kneighbors_graph from sklearn.exceptions import EfficiencyWarning +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import skip_if_32bit +from sklearn.utils import check_random_state +from sklearn.manifold._t_sne import _joint_probabilities +from sklearn.manifold._t_sne import _joint_probabilities_nn +from sklearn.manifold._t_sne import _kl_divergence +from sklearn.manifold._t_sne import _kl_divergence_bh +from sklearn.manifold._t_sne import _gradient_descent +from sklearn.manifold._t_sne import trustworthiness +from sklearn.manifold import TSNE # mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne' from sklearn.manifold import _barnes_hut_tsne # type: ignore -from sklearn.manifold import TSNE -from sklearn.manifold._t_sne import ( - _gradient_descent, - _joint_probabilities, - _joint_probabilities_nn, - _kl_divergence, - _kl_divergence_bh, - trustworthiness, -) from sklearn.manifold._utils import _binary_search_perplexity -from sklearn.metrics.pairwise import ( - cosine_distances, - manhattan_distances, - pairwise_distances, -) -from sklearn.neighbors import NearestNeighbors, kneighbors_graph -from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, - skip_if_32bit, -) +from sklearn.datasets import make_blobs +from scipy.optimize import check_grad +from scipy.spatial.distance import pdist +from scipy.spatial.distance import squareform +from sklearn.metrics.pairwise import pairwise_distances +from sklearn.metrics.pairwise import manhattan_distances +from sklearn.metrics.pairwise import cosine_distances + x = np.linspace(0, 1, 10) xx, yy = np.meshgrid(x, x) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 37d5c89fce485..0c6f74a8b7f38 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -4,94 +4,100 @@ """ -from . import cluster -from ._classification import ( - accuracy_score, - balanced_accuracy_score, - brier_score_loss, - classification_report, - cohen_kappa_score, - confusion_matrix, - f1_score, - fbeta_score, - hamming_loss, - hinge_loss, - jaccard_score, - log_loss, - matthews_corrcoef, - multilabel_confusion_matrix, - precision_recall_fscore_support, - precision_score, - recall_score, - zero_one_loss, -) +from ._ranking import auc +from ._ranking import average_precision_score +from ._ranking import coverage_error +from ._ranking import det_curve +from ._ranking import dcg_score +from ._ranking import label_ranking_average_precision_score +from ._ranking import label_ranking_loss +from ._ranking import ndcg_score +from ._ranking import precision_recall_curve +from ._ranking import roc_auc_score +from ._ranking import roc_curve +from ._ranking import top_k_accuracy_score + +from ._classification import accuracy_score +from ._classification import balanced_accuracy_score +from ._classification import classification_report +from ._classification import cohen_kappa_score +from ._classification import confusion_matrix +from ._classification import f1_score +from ._classification import fbeta_score +from ._classification import hamming_loss +from ._classification import hinge_loss +from ._classification import jaccard_score +from ._classification import log_loss +from ._classification import matthews_corrcoef +from ._classification import precision_recall_fscore_support +from ._classification import precision_score +from ._classification import recall_score +from ._classification import zero_one_loss +from ._classification import brier_score_loss +from ._classification import multilabel_confusion_matrix + from ._dist_metrics import DistanceMetric -from ._plot.confusion_matrix import ConfusionMatrixDisplay, plot_confusion_matrix -from ._plot.det_curve import DetCurveDisplay, plot_det_curve -from ._plot.precision_recall_curve import ( - PrecisionRecallDisplay, - plot_precision_recall_curve, -) -from ._plot.roc_curve import RocCurveDisplay, plot_roc_curve -from ._ranking import ( - auc, - average_precision_score, - coverage_error, - dcg_score, - det_curve, - label_ranking_average_precision_score, - label_ranking_loss, - ndcg_score, - precision_recall_curve, - roc_auc_score, - roc_curve, - top_k_accuracy_score, -) -from ._regression import ( - d2_absolute_error_score, - d2_pinball_score, - d2_tweedie_score, - explained_variance_score, - max_error, - mean_absolute_error, - mean_absolute_percentage_error, - mean_gamma_deviance, - mean_pinball_loss, - mean_poisson_deviance, - mean_squared_error, - mean_squared_log_error, - mean_tweedie_deviance, - median_absolute_error, - r2_score, -) -from ._scorer import SCORERS, check_scoring, get_scorer, get_scorer_names, make_scorer -from .cluster import ( - adjusted_mutual_info_score, - adjusted_rand_score, - calinski_harabasz_score, - completeness_score, - consensus_score, - davies_bouldin_score, - fowlkes_mallows_score, - homogeneity_completeness_v_measure, - homogeneity_score, - mutual_info_score, - normalized_mutual_info_score, - pair_confusion_matrix, - rand_score, - silhouette_samples, - silhouette_score, - v_measure_score, -) -from .pairwise import ( - euclidean_distances, - nan_euclidean_distances, - pairwise_distances, - pairwise_distances_argmin, - pairwise_distances_argmin_min, - pairwise_distances_chunked, - pairwise_kernels, -) + +from . import cluster +from .cluster import adjusted_mutual_info_score +from .cluster import adjusted_rand_score +from .cluster import rand_score +from .cluster import pair_confusion_matrix +from .cluster import completeness_score +from .cluster import consensus_score +from .cluster import homogeneity_completeness_v_measure +from .cluster import homogeneity_score +from .cluster import mutual_info_score +from .cluster import normalized_mutual_info_score +from .cluster import fowlkes_mallows_score +from .cluster import silhouette_samples +from .cluster import silhouette_score +from .cluster import calinski_harabasz_score +from .cluster import v_measure_score +from .cluster import davies_bouldin_score + +from .pairwise import euclidean_distances +from .pairwise import nan_euclidean_distances +from .pairwise import pairwise_distances +from .pairwise import pairwise_distances_argmin +from .pairwise import pairwise_distances_argmin_min +from .pairwise import pairwise_kernels +from .pairwise import pairwise_distances_chunked + +from ._regression import explained_variance_score +from ._regression import max_error +from ._regression import mean_absolute_error +from ._regression import mean_squared_error +from ._regression import mean_squared_log_error +from ._regression import median_absolute_error +from ._regression import mean_absolute_percentage_error +from ._regression import mean_pinball_loss +from ._regression import r2_score +from ._regression import mean_tweedie_deviance +from ._regression import mean_poisson_deviance +from ._regression import mean_gamma_deviance +from ._regression import d2_tweedie_score +from ._regression import d2_pinball_score +from ._regression import d2_absolute_error_score + + +from ._scorer import check_scoring +from ._scorer import make_scorer +from ._scorer import SCORERS +from ._scorer import get_scorer +from ._scorer import get_scorer_names + + +from ._plot.det_curve import plot_det_curve +from ._plot.det_curve import DetCurveDisplay +from ._plot.roc_curve import plot_roc_curve +from ._plot.roc_curve import RocCurveDisplay +from ._plot.precision_recall_curve import plot_precision_recall_curve +from ._plot.precision_recall_curve import PrecisionRecallDisplay + +from ._plot.confusion_matrix import plot_confusion_matrix +from ._plot.confusion_matrix import ConfusionMatrixDisplay + __all__ = [ "accuracy_score", diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index fa3c4d81c6c9c..d759f6c4b3e76 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -24,21 +24,23 @@ import warnings - import numpy as np -from scipy.sparse import coo_matrix, csr_matrix -from ..exceptions import UndefinedMetricWarning -from ..preprocessing import LabelBinarizer, LabelEncoder -from ..utils import ( - assert_all_finite, - check_array, - check_consistent_length, - column_or_1d, -) -from ..utils.multiclass import type_of_target, unique_labels -from ..utils.sparsefuncs import count_nonzero +from scipy.sparse import coo_matrix +from scipy.sparse import csr_matrix + +from ..preprocessing import LabelBinarizer +from ..preprocessing import LabelEncoder +from ..utils import assert_all_finite +from ..utils import check_array +from ..utils import check_consistent_length +from ..utils import column_or_1d +from ..utils.multiclass import unique_labels +from ..utils.multiclass import type_of_target from ..utils.validation import _num_samples +from ..utils.sparsefuncs import count_nonzero +from ..exceptions import UndefinedMetricWarning + from ._base import _check_pos_label_consistency diff --git a/sklearn/metrics/_dist_metrics.pxd b/sklearn/metrics/_dist_metrics.pxd index d4aefec5cc9f3..2c45bd6f40dff 100644 --- a/sklearn/metrics/_dist_metrics.pxd +++ b/sklearn/metrics/_dist_metrics.pxd @@ -1,9 +1,8 @@ cimport numpy as cnp -from libc.math cimport exp, sqrt +from libc.math cimport sqrt, exp from ..utils._typedefs cimport DTYPE_t, ITYPE_t - ###################################################################### # Inline distance functions # diff --git a/sklearn/metrics/_dist_metrics.pyx b/sklearn/metrics/_dist_metrics.pyx index 2c74dd5d75962..e3ad251a23e01 100644 --- a/sklearn/metrics/_dist_metrics.pyx +++ b/sklearn/metrics/_dist_metrics.pyx @@ -3,7 +3,6 @@ # License: BSD import numpy as np - cimport numpy as cnp from cython cimport final @@ -26,19 +25,14 @@ cdef inline cnp.ndarray _buffer_to_ndarray(const DTYPE_t* x, cnp.npy_intp n): return PyArray_SimpleNewFromData(1, &n, DTYPECODE, x) -from libc.math cimport asin, cos, exp, fabs, pow, sin, sqrt - - +from libc.math cimport fabs, sqrt, exp, pow, cos, sin, asin cdef DTYPE_t INF = np.inf from scipy.sparse import csr_matrix, issparse - -from ..utils._typedefs cimport DTYPECODE, DTYPE_t, ITYPE_t - -from ..utils import check_array -from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper +from ..utils._typedefs cimport DTYPE_t, ITYPE_t, DTYPECODE from ..utils._typedefs import DTYPE, ITYPE - +from ..utils._readonly_array_wrapper import ReadonlyArrayWrapper +from ..utils import check_array ###################################################################### # newObj function diff --git a/sklearn/metrics/_pairwise_distances_reduction.pyx b/sklearn/metrics/_pairwise_distances_reduction.pyx index d1a6f1b8776f5..9191efae2a8da 100644 --- a/sklearn/metrics/_pairwise_distances_reduction.pyx +++ b/sklearn/metrics/_pairwise_distances_reduction.pyx @@ -14,49 +14,45 @@ # (using Cython prange loops) which gives another multiplicative speed-up in # favorable cases on many-core machines. cimport numpy as cnp - -import warnings - import numpy as np +import warnings from .. import get_config - -from cpython.ref cimport Py_INCREF +from libc.stdlib cimport free, malloc +from libc.float cimport DBL_MAX +from libcpp.memory cimport shared_ptr, make_shared +from libcpp.vector cimport vector from cython cimport final from cython.operator cimport dereference as deref from cython.parallel cimport parallel, prange -from libc.float cimport DBL_MAX -from libc.stdlib cimport free, malloc -from libcpp.memory cimport make_shared, shared_ptr -from libcpp.vector cimport vector +from cpython.ref cimport Py_INCREF +from ._dist_metrics cimport DatasetsPair, DenseDenseDatasetsPair from ..utils._cython_blas cimport ( - BLAS_Order, - BLAS_Trans, - ColMajor, - NoTrans, - RowMajor, - Trans, - _dot, - _gemm, + BLAS_Order, + BLAS_Trans, + ColMajor, + NoTrans, + RowMajor, + Trans, + _dot, + _gemm, ) from ..utils._heap cimport heap_push -from ..utils._openmp_helpers cimport _openmp_thread_num from ..utils._sorting cimport simultaneous_sort -from ..utils._typedefs cimport DTYPE_t, ITYPE_t +from ..utils._openmp_helpers cimport _openmp_thread_num +from ..utils._typedefs cimport ITYPE_t, DTYPE_t from ..utils._vector_sentinel cimport vector_to_nd_array -from ._dist_metrics cimport DatasetsPair, DenseDenseDatasetsPair from numbers import Integral, Real from typing import List - from scipy.sparse import issparse - -from ..utils import _in_unstable_openblas_configuration, check_scalar -from ..utils._openmp_helpers import _openmp_effective_n_threads -from ..utils._typedefs import DTYPE, ITYPE -from ..utils.fixes import threadpool_limits from ._dist_metrics import BOOL_METRICS, METRIC_MAPPING +from ..utils import check_scalar, _in_unstable_openblas_configuration +from ..utils.fixes import threadpool_limits +from ..utils._openmp_helpers import _openmp_effective_n_threads +from ..utils._typedefs import ITYPE, DTYPE + cnp.import_array() diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py index c84cca834e494..590a95970a0e4 100644 --- a/sklearn/metrics/_plot/confusion_matrix.py +++ b/sklearn/metrics/_plot/confusion_matrix.py @@ -2,10 +2,11 @@ import numpy as np -from ...base import is_classifier -from ...utils import check_matplotlib_support, deprecated -from ...utils.multiclass import unique_labels from .. import confusion_matrix +from ...utils import check_matplotlib_support +from ...utils import deprecated +from ...utils.multiclass import unique_labels +from ...base import is_classifier class ConfusionMatrixDisplay: diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py index a5a4e41b2622d..92e84ce9b7974 100644 --- a/sklearn/metrics/_plot/det_curve.py +++ b/sklearn/metrics/_plot/det_curve.py @@ -1,10 +1,13 @@ import scipy as sp -from ...utils import check_matplotlib_support, deprecated -from .._base import _check_pos_label_consistency -from .._ranking import det_curve from .base import _get_response +from .. import det_curve +from .._base import _check_pos_label_consistency + +from ...utils import check_matplotlib_support +from ...utils import deprecated + class DetCurveDisplay: """DET curve visualization. diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py index cc9408d90a131..b3ccab0825703 100644 --- a/sklearn/metrics/_plot/precision_recall_curve.py +++ b/sklearn/metrics/_plot/precision_recall_curve.py @@ -1,10 +1,12 @@ from sklearn.base import is_classifier +from .base import _get_response -from ...utils import check_matplotlib_support, deprecated +from .. import average_precision_score +from .. import precision_recall_curve from .._base import _check_pos_label_consistency from .._classification import check_consistent_length -from .._ranking import average_precision_score, precision_recall_curve -from .base import _get_response + +from ...utils import check_matplotlib_support, deprecated class PrecisionRecallDisplay: diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py index b2fb09225ed55..a56cd3755b8d6 100644 --- a/sklearn/metrics/_plot/roc_curve.py +++ b/sklearn/metrics/_plot/roc_curve.py @@ -1,8 +1,11 @@ -from ...utils import check_matplotlib_support, deprecated -from .._base import _check_pos_label_consistency -from .._ranking import auc, roc_curve from .base import _get_response +from .. import auc +from .. import roc_curve +from .._base import _check_pos_label_consistency + +from ...utils import check_matplotlib_support, deprecated + class RocCurveDisplay: """ROC Curve visualization. diff --git a/sklearn/metrics/_plot/tests/test_base.py b/sklearn/metrics/_plot/tests/test_base.py index fa860968e6044..2f67d7dd223f4 100644 --- a/sklearn/metrics/_plot/tests/test_base.py +++ b/sklearn/metrics/_plot/tests/test_base.py @@ -3,9 +3,10 @@ from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression -from sklearn.metrics._plot.base import _get_response from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.metrics._plot.base import _get_response + @pytest.mark.parametrize( "estimator, err_msg, params", diff --git a/sklearn/metrics/_plot/tests/test_common_curve_display.py b/sklearn/metrics/_plot/tests/test_common_curve_display.py index 0a0a31f0c0c17..5ed036b77f4d0 100644 --- a/sklearn/metrics/_plot/tests/test_common_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_common_curve_display.py @@ -5,11 +5,16 @@ from sklearn.datasets import load_iris from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import DetCurveDisplay, PrecisionRecallDisplay, RocCurveDisplay from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeClassifier +from sklearn.metrics import ( + DetCurveDisplay, + PrecisionRecallDisplay, + RocCurveDisplay, +) + @pytest.fixture(scope="module") def data(): diff --git a/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py b/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py index e0911b47d13b2..e826888b65f89 100644 --- a/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py +++ b/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py @@ -1,16 +1,22 @@ +from numpy.testing import ( + assert_allclose, + assert_array_equal, +) import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_equal -from sklearn.compose import make_column_transformer from sklearn.datasets import make_classification +from sklearn.compose import make_column_transformer from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, SVR +from sklearn.metrics import ConfusionMatrixDisplay +from sklearn.metrics import confusion_matrix + + # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/metrics/_plot/tests/test_det_curve_display.py b/sklearn/metrics/_plot/tests/test_det_curve_display.py index 403ea70109577..5d7a26d5e49a0 100644 --- a/sklearn/metrics/_plot/tests/test_det_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_det_curve_display.py @@ -1,10 +1,12 @@ -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression -from sklearn.metrics import DetCurveDisplay, det_curve + +from sklearn.metrics import det_curve +from sklearn.metrics import DetCurveDisplay @pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"]) diff --git a/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py b/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py index cc99298dbfc55..4a4c4a96a5b32 100644 --- a/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py +++ b/sklearn/metrics/_plot/tests/test_plot_confusion_matrix.py @@ -1,21 +1,22 @@ # TODO: remove this file when plot_confusion_matrix will be deprecated in 1.2 -import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_equal +import numpy as np +from numpy.testing import assert_allclose +from numpy.testing import assert_array_equal from sklearn.compose import make_column_transformer from sklearn.datasets import make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ( - ConfusionMatrixDisplay, - confusion_matrix, - plot_confusion_matrix, -) from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, SVR +from sklearn.metrics import confusion_matrix +from sklearn.metrics import plot_confusion_matrix +from sklearn.metrics import ConfusionMatrixDisplay + + # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/metrics/_plot/tests/test_plot_curve_common.py b/sklearn/metrics/_plot/tests/test_plot_curve_common.py index 728e0a2694a06..d430acd42596c 100644 --- a/sklearn/metrics/_plot/tests/test_plot_curve_common.py +++ b/sklearn/metrics/_plot/tests/test_plot_curve_common.py @@ -1,15 +1,18 @@ import pytest -from sklearn.base import ClassifierMixin, clone +from sklearn.base import ClassifierMixin +from sklearn.base import clone from sklearn.compose import make_column_transformer from sklearn.datasets import load_iris from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import plot_det_curve, plot_roc_curve from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeClassifier +from sklearn.metrics import plot_det_curve +from sklearn.metrics import plot_roc_curve + pytestmark = pytest.mark.filterwarnings( "ignore:Function plot_roc_curve is deprecated", ) diff --git a/sklearn/metrics/_plot/tests/test_plot_det_curve.py b/sklearn/metrics/_plot/tests/test_plot_det_curve.py index ad10dc94c08cb..31f840a6a1ff5 100644 --- a/sklearn/metrics/_plot/tests/test_plot_det_curve.py +++ b/sklearn/metrics/_plot/tests/test_plot_det_curve.py @@ -1,11 +1,13 @@ # TODO: remove this file when plot_det_curve will be deprecated in 1.2 -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression -from sklearn.metrics import det_curve, plot_det_curve + +from sklearn.metrics import det_curve +from sklearn.metrics import plot_det_curve @pytest.fixture(scope="module") diff --git a/sklearn/metrics/_plot/tests/test_plot_precision_recall.py b/sklearn/metrics/_plot/tests/test_plot_precision_recall.py index a3fc3ce61253c..1d687b0c31abc 100644 --- a/sklearn/metrics/_plot/tests/test_plot_precision_recall.py +++ b/sklearn/metrics/_plot/tests/test_plot_precision_recall.py @@ -1,22 +1,21 @@ -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.compose import make_column_transformer -from sklearn.datasets import load_breast_cancer, make_classification -from sklearn.exceptions import NotFittedError +from sklearn.metrics import plot_precision_recall_curve +from sklearn.metrics import average_precision_score +from sklearn.metrics import precision_recall_curve +from sklearn.datasets import make_classification +from sklearn.datasets import load_breast_cancer +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ( - average_precision_score, - plot_precision_recall_curve, - precision_recall_curve, -) from sklearn.model_selection import train_test_split +from sklearn.exceptions import NotFittedError from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.utils import shuffle +from sklearn.compose import make_column_transformer pytestmark = pytest.mark.filterwarnings( # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved diff --git a/sklearn/metrics/_plot/tests/test_plot_roc_curve.py b/sklearn/metrics/_plot/tests/test_plot_roc_curve.py index 60946f6a12baa..587d1c2d272d9 100644 --- a/sklearn/metrics/_plot/tests/test_plot_roc_curve.py +++ b/sklearn/metrics/_plot/tests/test_plot_roc_curve.py @@ -1,16 +1,19 @@ -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose -from sklearn.compose import make_column_transformer -from sklearn.datasets import load_breast_cancer, load_iris -from sklearn.exceptions import NotFittedError +from sklearn.metrics import plot_roc_curve +from sklearn.metrics import roc_curve +from sklearn.metrics import auc +from sklearn.datasets import load_iris +from sklearn.datasets import load_breast_cancer from sklearn.linear_model import LogisticRegression -from sklearn.metrics import auc, plot_roc_curve, roc_curve from sklearn.model_selection import train_test_split +from sklearn.exceptions import NotFittedError from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle +from sklearn.compose import make_column_transformer # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py index b516486e59a9a..49b508942ab56 100644 --- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py +++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py @@ -5,18 +5,15 @@ from sklearn.datasets import load_breast_cancer, make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ( - PrecisionRecallDisplay, - average_precision_score, - plot_precision_recall_curve, - precision_recall_curve, -) +from sklearn.metrics import average_precision_score, precision_recall_curve from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC, SVR from sklearn.utils import shuffle +from sklearn.metrics import PrecisionRecallDisplay, plot_precision_recall_curve + # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:" diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index 13eec20cb5b7b..e8465b53747ec 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -1,18 +1,26 @@ -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose + from sklearn.compose import make_column_transformer -from sklearn.datasets import load_breast_cancer, load_iris, make_classification +from sklearn.datasets import load_iris + +from sklearn.datasets import load_breast_cancer, make_classification from sklearn.exceptions import NotFittedError from sklearn.linear_model import LogisticRegression -from sklearn.metrics import RocCurveDisplay, auc, plot_roc_curve, roc_curve +from sklearn.metrics import roc_curve +from sklearn.metrics import auc + from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle +from sklearn.metrics import RocCurveDisplay, plot_roc_curve + + @pytest.fixture(scope="module") def data(): return load_iris(return_X_y=True) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 3cf49851aa502..4e88bd5edc888 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -26,19 +26,17 @@ from scipy.sparse import csr_matrix from scipy.stats import rankdata +from ..utils import assert_all_finite +from ..utils import check_consistent_length +from ..utils.validation import _check_sample_weight +from ..utils import column_or_1d, check_array +from ..utils.multiclass import type_of_target +from ..utils.extmath import stable_cumsum +from ..utils.sparsefuncs import count_nonzero from ..exceptions import UndefinedMetricWarning from ..preprocessing import label_binarize -from ..utils import ( - assert_all_finite, - check_array, - check_consistent_length, - column_or_1d, -) from ..utils._encode import _encode, _unique -from ..utils.extmath import stable_cumsum -from ..utils.multiclass import type_of_target -from ..utils.sparsefuncs import count_nonzero -from ..utils.validation import _check_sample_weight + from ._base import ( _average_binary_score, _average_multiclass_ovo_score, diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index f8d4c941b8fb9..57986692fb896 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -33,15 +33,16 @@ from scipy.special import xlogy from ..exceptions import UndefinedMetricWarning -from ..utils.stats import _weighted_percentile from ..utils.validation import ( - _check_sample_weight, - _num_samples, check_array, check_consistent_length, check_scalar, + _num_samples, column_or_1d, + _check_sample_weight, ) +from ..utils.stats import _weighted_percentile + __ALL__ = [ "max_error", diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 652e314ac95cb..e1655af169fcc 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -18,52 +18,52 @@ # Arnaud Joly # License: Simplified BSD -import copy -import warnings -from collections import Counter from collections.abc import Iterable from functools import partial +from collections import Counter import numpy as np +import copy +import warnings -from ..base import is_regressor -from ..utils.multiclass import type_of_target from . import ( - accuracy_score, - average_precision_score, - balanced_accuracy_score, - brier_score_loss, - explained_variance_score, - f1_score, - jaccard_score, - log_loss, - matthews_corrcoef, + r2_score, + median_absolute_error, max_error, mean_absolute_error, - mean_absolute_percentage_error, - mean_gamma_deviance, - mean_poisson_deviance, mean_squared_error, mean_squared_log_error, - median_absolute_error, + mean_poisson_deviance, + mean_gamma_deviance, + accuracy_score, + top_k_accuracy_score, + f1_score, + roc_auc_score, + average_precision_score, precision_score, - r2_score, recall_score, - roc_auc_score, - top_k_accuracy_score, -) -from .cluster import ( - adjusted_mutual_info_score, - adjusted_rand_score, - completeness_score, - fowlkes_mallows_score, - homogeneity_score, - mutual_info_score, - normalized_mutual_info_score, - rand_score, - v_measure_score, + log_loss, + balanced_accuracy_score, + explained_variance_score, + brier_score_loss, + jaccard_score, + mean_absolute_percentage_error, + matthews_corrcoef, ) +from .cluster import adjusted_rand_score +from .cluster import rand_score +from .cluster import homogeneity_score +from .cluster import completeness_score +from .cluster import v_measure_score +from .cluster import mutual_info_score +from .cluster import adjusted_mutual_info_score +from .cluster import normalized_mutual_info_score +from .cluster import fowlkes_mallows_score + +from ..utils.multiclass import type_of_target +from ..base import is_regressor + def _cached_call(cache, estimator, method, *args, **kwargs): """Call estimator with method and args and kwargs.""" diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py index a332997a84414..fefb47b11903a 100644 --- a/sklearn/metrics/cluster/__init__.py +++ b/sklearn/metrics/cluster/__init__.py @@ -5,29 +5,25 @@ - supervised, which uses a ground truth class values for each sample. - unsupervised, which does not and measures the 'quality' of the model itself. """ +from ._supervised import adjusted_mutual_info_score +from ._supervised import normalized_mutual_info_score +from ._supervised import adjusted_rand_score +from ._supervised import rand_score +from ._supervised import completeness_score +from ._supervised import contingency_matrix +from ._supervised import pair_confusion_matrix +from ._supervised import expected_mutual_information +from ._supervised import homogeneity_completeness_v_measure +from ._supervised import homogeneity_score +from ._supervised import mutual_info_score +from ._supervised import v_measure_score +from ._supervised import fowlkes_mallows_score +from ._supervised import entropy +from ._unsupervised import silhouette_samples +from ._unsupervised import silhouette_score +from ._unsupervised import calinski_harabasz_score +from ._unsupervised import davies_bouldin_score from ._bicluster import consensus_score -from ._supervised import ( - adjusted_mutual_info_score, - adjusted_rand_score, - completeness_score, - contingency_matrix, - entropy, - expected_mutual_information, - fowlkes_mallows_score, - homogeneity_completeness_v_measure, - homogeneity_score, - mutual_info_score, - normalized_mutual_info_score, - pair_confusion_matrix, - rand_score, - v_measure_score, -) -from ._unsupervised import ( - calinski_harabasz_score, - davies_bouldin_score, - silhouette_samples, - silhouette_score, -) __all__ = [ "adjusted_mutual_info_score", diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py index cd06b63bafdbf..3ed314dfa96d2 100644 --- a/sklearn/metrics/cluster/_bicluster.py +++ b/sklearn/metrics/cluster/_bicluster.py @@ -1,7 +1,7 @@ import numpy as np from scipy.optimize import linear_sum_assignment -from ...utils.validation import check_array, check_consistent_length +from ...utils.validation import check_consistent_length, check_array __all__ = ["consensus_score"] diff --git a/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx b/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx index 50b5066d82e29..1f9c0dc78bf95 100644 --- a/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx +++ b/sklearn/metrics/cluster/_expected_mutual_info_fast.pyx @@ -3,12 +3,10 @@ # License: BSD 3 clause from libc.math cimport exp, lgamma - -import numpy as np from scipy.special import gammaln - -cimport cython +import numpy as np cimport numpy as cnp +cimport cython cnp.import_array() ctypedef cnp.float64_t DOUBLE diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index b2fc15cb14538..a6a66884b70b2 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -22,9 +22,9 @@ import numpy as np from scipy import sparse as sp +from ._expected_mutual_info_fast import expected_mutual_information from ...utils.multiclass import type_of_target from ...utils.validation import check_array, check_consistent_length -from ._expected_mutual_info_fast import expected_mutual_information def check_clusterings(labels_true, labels_pred): diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index fb234f2cbfb6f..3c25330cde707 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -10,9 +10,12 @@ import numpy as np +from ...utils import check_random_state +from ...utils import check_X_y +from ...utils import _safe_indexing +from ..pairwise import pairwise_distances_chunked +from ..pairwise import pairwise_distances from ...preprocessing import LabelEncoder -from ...utils import _safe_indexing, check_random_state, check_X_y -from ..pairwise import pairwise_distances, pairwise_distances_chunked def check_number_of_labels(n_labels, n_samples): diff --git a/sklearn/metrics/cluster/tests/test_bicluster.py b/sklearn/metrics/cluster/tests/test_bicluster.py index 53f7805100a13..2cbcb6e6826c7 100644 --- a/sklearn/metrics/cluster/tests/test_bicluster.py +++ b/sklearn/metrics/cluster/tests/test_bicluster.py @@ -2,10 +2,11 @@ import numpy as np -from sklearn.metrics import consensus_score -from sklearn.metrics.cluster._bicluster import _jaccard from sklearn.utils._testing import assert_almost_equal +from sklearn.metrics.cluster._bicluster import _jaccard +from sklearn.metrics import consensus_score + def test_jaccard(): a1 = np.array([True, True, False, False]) diff --git a/sklearn/metrics/cluster/tests/test_common.py b/sklearn/metrics/cluster/tests/test_common.py index bc32b7df7f561..a4e8c4530dbe6 100644 --- a/sklearn/metrics/cluster/tests/test_common.py +++ b/sklearn/metrics/cluster/tests/test_common.py @@ -1,25 +1,25 @@ from functools import partial from itertools import chain -import numpy as np import pytest +import numpy as np + +from sklearn.metrics.cluster import adjusted_mutual_info_score +from sklearn.metrics.cluster import adjusted_rand_score +from sklearn.metrics.cluster import rand_score +from sklearn.metrics.cluster import completeness_score +from sklearn.metrics.cluster import fowlkes_mallows_score +from sklearn.metrics.cluster import homogeneity_score +from sklearn.metrics.cluster import mutual_info_score +from sklearn.metrics.cluster import normalized_mutual_info_score +from sklearn.metrics.cluster import v_measure_score +from sklearn.metrics.cluster import silhouette_score +from sklearn.metrics.cluster import calinski_harabasz_score +from sklearn.metrics.cluster import davies_bouldin_score -from sklearn.metrics.cluster import ( - adjusted_mutual_info_score, - adjusted_rand_score, - calinski_harabasz_score, - completeness_score, - davies_bouldin_score, - fowlkes_mallows_score, - homogeneity_score, - mutual_info_score, - normalized_mutual_info_score, - rand_score, - silhouette_score, - v_measure_score, -) from sklearn.utils._testing import assert_allclose + # Dictionaries of metrics # ------------------------ # The goal of having those dictionaries is to have an easy way to call a diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index dfaa58ff62c01..4356a0a05286c 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -2,27 +2,28 @@ import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal -from sklearn.metrics.cluster import ( - adjusted_mutual_info_score, - adjusted_rand_score, - completeness_score, - contingency_matrix, - entropy, - expected_mutual_information, - fowlkes_mallows_score, - homogeneity_completeness_v_measure, - homogeneity_score, - mutual_info_score, - normalized_mutual_info_score, - pair_confusion_matrix, - rand_score, - v_measure_score, -) -from sklearn.metrics.cluster._supervised import _generalized_average, check_clusterings +from sklearn.metrics.cluster import adjusted_mutual_info_score +from sklearn.metrics.cluster import adjusted_rand_score +from sklearn.metrics.cluster import rand_score +from sklearn.metrics.cluster import completeness_score +from sklearn.metrics.cluster import contingency_matrix +from sklearn.metrics.cluster import pair_confusion_matrix +from sklearn.metrics.cluster import entropy +from sklearn.metrics.cluster import expected_mutual_information +from sklearn.metrics.cluster import fowlkes_mallows_score +from sklearn.metrics.cluster import homogeneity_completeness_v_measure +from sklearn.metrics.cluster import homogeneity_score +from sklearn.metrics.cluster import mutual_info_score +from sklearn.metrics.cluster import normalized_mutual_info_score +from sklearn.metrics.cluster import v_measure_score +from sklearn.metrics.cluster._supervised import _generalized_average +from sklearn.metrics.cluster._supervised import check_clusterings + from sklearn.utils import assert_all_finite from sklearn.utils._testing import assert_almost_equal +from numpy.testing import assert_array_equal, assert_array_almost_equal, assert_allclose + score_funcs = [ adjusted_rand_score, diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py index 9fde388fcda5a..22dd1a1bf1557 100644 --- a/sklearn/metrics/cluster/tests/test_unsupervised.py +++ b/sklearn/metrics/cluster/tests/test_unsupervised.py @@ -1,19 +1,17 @@ import warnings import numpy as np -import pytest import scipy.sparse as sp +import pytest from scipy.sparse import csr_matrix from sklearn import datasets -from sklearn.metrics import pairwise_distances -from sklearn.metrics.cluster import ( - calinski_harabasz_score, - davies_bouldin_score, - silhouette_samples, - silhouette_score, -) from sklearn.utils._testing import assert_array_equal +from sklearn.metrics.cluster import silhouette_score +from sklearn.metrics.cluster import silhouette_samples +from sklearn.metrics import pairwise_distances +from sklearn.metrics.cluster import calinski_harabasz_score +from sklearn.metrics.cluster import davies_bouldin_score def test_silhouette(): diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 84446705d827b..33b2a9901902b 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -8,31 +8,31 @@ # License: BSD 3 clause import itertools -import warnings from functools import partial +import warnings import numpy as np -from scipy.sparse import csr_matrix, issparse from scipy.spatial import distance - +from scipy.sparse import csr_matrix +from scipy.sparse import issparse from joblib import Parallel, effective_n_jobs from .. import config_context -from ..exceptions import DataConversionWarning +from ..utils.validation import _num_samples +from ..utils.validation import check_non_negative +from ..utils import check_array +from ..utils import gen_even_slices +from ..utils import gen_batches, get_chunk_n_rows +from ..utils import is_scalar_nan +from ..utils.extmath import row_norms, safe_sparse_dot from ..preprocessing import normalize -from ..utils import ( - check_array, - gen_batches, - gen_even_slices, - get_chunk_n_rows, - is_scalar_nan, -) from ..utils._mask import _get_mask -from ..utils.extmath import row_norms, safe_sparse_dot -from ..utils.fixes import delayed, parse_version, sp_version -from ..utils.validation import _num_samples, check_non_negative +from ..utils.fixes import delayed +from ..utils.fixes import sp_version, parse_version + from ._pairwise_distances_reduction import PairwiseDistancesArgKmin from ._pairwise_fast import _chi2_kernel_fast, _sparse_manhattan +from ..exceptions import DataConversionWarning # Utility Functions diff --git a/sklearn/metrics/setup.py b/sklearn/metrics/setup.py index d2d40afd2d889..736ba6d7d4424 100644 --- a/sklearn/metrics/setup.py +++ b/sklearn/metrics/setup.py @@ -1,6 +1,6 @@ import os - import numpy as np + from numpy.distutils.misc_util import Configuration diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 6a977dc1e3b35..25c2dcda55d9c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1,49 +1,52 @@ -import re -import warnings from functools import partial -from itertools import chain, permutations, product +from itertools import product +from itertools import chain +from itertools import permutations +import warnings +import re import numpy as np -import pytest from scipy import linalg -from scipy.spatial.distance import hamming as sp_hamming +import pytest + +from sklearn import datasets +from sklearn import svm -from sklearn import datasets, svm from sklearn.datasets import make_multilabel_classification -from sklearn.exceptions import UndefinedMetricWarning -from sklearn.metrics import ( - accuracy_score, - average_precision_score, - balanced_accuracy_score, - brier_score_loss, - classification_report, - cohen_kappa_score, - confusion_matrix, - f1_score, - fbeta_score, - hamming_loss, - hinge_loss, - jaccard_score, - log_loss, - matthews_corrcoef, - multilabel_confusion_matrix, - precision_recall_fscore_support, - precision_score, - recall_score, - zero_one_loss, -) -from sklearn.metrics._classification import _check_targets -from sklearn.preprocessing import LabelBinarizer, label_binarize -from sklearn.utils._mocking import MockDataFrame -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - assert_no_warnings, - ignore_warnings, -) +from sklearn.preprocessing import label_binarize, LabelBinarizer from sklearn.utils.validation import check_random_state +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_no_warnings +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._mocking import MockDataFrame + +from sklearn.metrics import accuracy_score +from sklearn.metrics import average_precision_score +from sklearn.metrics import balanced_accuracy_score +from sklearn.metrics import classification_report +from sklearn.metrics import cohen_kappa_score +from sklearn.metrics import confusion_matrix +from sklearn.metrics import f1_score +from sklearn.metrics import fbeta_score +from sklearn.metrics import hamming_loss +from sklearn.metrics import hinge_loss +from sklearn.metrics import jaccard_score +from sklearn.metrics import log_loss +from sklearn.metrics import matthews_corrcoef +from sklearn.metrics import precision_recall_fscore_support +from sklearn.metrics import precision_score +from sklearn.metrics import recall_score +from sklearn.metrics import zero_one_loss +from sklearn.metrics import brier_score_loss +from sklearn.metrics import multilabel_confusion_matrix + +from sklearn.metrics._classification import _check_targets +from sklearn.exceptions import UndefinedMetricWarning + +from scipy.spatial.distance import hamming as sp_hamming ############################################################################### # Utilities for testing @@ -2435,7 +2438,7 @@ def test_log_loss_pandas_input(): y_pr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]]) types = [(MockDataFrame, MockDataFrame)] try: - from pandas import DataFrame, Series + from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 8bff90e00a1a9..1e627f9f86676 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -1,67 +1,71 @@ from functools import partial from inspect import signature -from itertools import chain, permutations, product +from itertools import product +from itertools import chain +from itertools import permutations import numpy as np -import pytest import scipy.sparse as sp +import pytest + from sklearn.datasets import make_multilabel_classification -from sklearn.metrics import ( - accuracy_score, - average_precision_score, - balanced_accuracy_score, - brier_score_loss, - cohen_kappa_score, - confusion_matrix, - coverage_error, - d2_absolute_error_score, - d2_pinball_score, - d2_tweedie_score, - dcg_score, - det_curve, - explained_variance_score, - f1_score, - fbeta_score, - hamming_loss, - hinge_loss, - jaccard_score, - label_ranking_average_precision_score, - label_ranking_loss, - log_loss, - matthews_corrcoef, - max_error, - mean_absolute_error, - mean_absolute_percentage_error, - mean_gamma_deviance, - mean_pinball_loss, - mean_poisson_deviance, - mean_squared_error, - mean_tweedie_deviance, - median_absolute_error, - multilabel_confusion_matrix, - ndcg_score, - precision_recall_curve, - precision_score, - r2_score, - recall_score, - roc_auc_score, - roc_curve, - top_k_accuracy_score, - zero_one_loss, -) -from sklearn.metrics._base import _average_binary_score from sklearn.preprocessing import LabelBinarizer -from sklearn.utils import shuffle -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_equal, - assert_array_less, - ignore_warnings, -) from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import _num_samples, check_random_state +from sklearn.utils.validation import _num_samples +from sklearn.utils.validation import check_random_state +from sklearn.utils import shuffle + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_less +from sklearn.utils._testing import ignore_warnings + +from sklearn.metrics import accuracy_score +from sklearn.metrics import average_precision_score +from sklearn.metrics import balanced_accuracy_score +from sklearn.metrics import brier_score_loss +from sklearn.metrics import cohen_kappa_score +from sklearn.metrics import confusion_matrix +from sklearn.metrics import coverage_error +from sklearn.metrics import d2_tweedie_score +from sklearn.metrics import d2_pinball_score +from sklearn.metrics import d2_absolute_error_score +from sklearn.metrics import det_curve +from sklearn.metrics import explained_variance_score +from sklearn.metrics import f1_score +from sklearn.metrics import fbeta_score +from sklearn.metrics import hamming_loss +from sklearn.metrics import hinge_loss +from sklearn.metrics import jaccard_score +from sklearn.metrics import label_ranking_average_precision_score +from sklearn.metrics import label_ranking_loss +from sklearn.metrics import log_loss +from sklearn.metrics import max_error +from sklearn.metrics import matthews_corrcoef +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_absolute_percentage_error +from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_tweedie_deviance +from sklearn.metrics import mean_poisson_deviance +from sklearn.metrics import mean_gamma_deviance +from sklearn.metrics import median_absolute_error +from sklearn.metrics import multilabel_confusion_matrix +from sklearn.metrics import mean_pinball_loss +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import precision_score +from sklearn.metrics import r2_score +from sklearn.metrics import recall_score +from sklearn.metrics import roc_auc_score +from sklearn.metrics import roc_curve +from sklearn.metrics import zero_one_loss +from sklearn.metrics import ndcg_score +from sklearn.metrics import dcg_score +from sklearn.metrics import top_k_accuracy_score + +from sklearn.metrics._base import _average_binary_score + # Note toward developers about metric testing # ------------------------------------------- diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py index e74c669d47d6a..6c841d1d44f8c 100644 --- a/sklearn/metrics/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -1,18 +1,19 @@ -import copy import itertools import pickle +import copy import numpy as np +from numpy.testing import assert_array_almost_equal + import pytest + import scipy.sparse as sp -from numpy.testing import assert_array_almost_equal from scipy.spatial.distance import cdist - from sklearn.metrics import DistanceMetric from sklearn.metrics._dist_metrics import BOOL_METRICS from sklearn.utils import check_random_state from sklearn.utils._testing import create_memmap_backed_data -from sklearn.utils.fixes import parse_version, sp_version +from sklearn.utils.fixes import sp_version, parse_version def dist_func(x1, x2, p): diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index a331c2f05580d..f14c558d5a3c1 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -3,15 +3,10 @@ import numpy as np from numpy import linalg -from scipy.sparse import csr_matrix, dok_matrix, issparse -from scipy.spatial.distance import ( - cdist, - cityblock, - cosine, - minkowski, - pdist, - squareform, -) + +from scipy.sparse import dok_matrix, csr_matrix, issparse +from scipy.spatial.distance import cosine, cityblock, minkowski +from scipy.spatial.distance import cdist, pdist, squareform try: from scipy.spatial.distance import wminkowski @@ -20,49 +15,47 @@ # should be used instead. from scipy.spatial.distance import minkowski as wminkowski +from sklearn.utils.fixes import sp_version, parse_version + import pytest from sklearn import config_context -from sklearn.exceptions import DataConversionWarning -from sklearn.metrics.pairwise import ( - PAIRED_DISTANCES, - PAIRWISE_BOOLEAN_FUNCTIONS, - PAIRWISE_DISTANCE_FUNCTIONS, - PAIRWISE_KERNEL_FUNCTIONS, - _euclidean_distances_upcast, - additive_chi2_kernel, - check_paired_arrays, - check_pairwise_arrays, - chi2_kernel, - cosine_distances, - cosine_similarity, - euclidean_distances, - haversine_distances, - laplacian_kernel, - linear_kernel, - manhattan_distances, - nan_euclidean_distances, - paired_distances, - paired_euclidean_distances, - paired_manhattan_distances, - pairwise_distances, - pairwise_distances_argmin, - pairwise_distances_argmin_min, - pairwise_distances_chunked, - pairwise_kernels, - polynomial_kernel, - rbf_kernel, - sigmoid_kernel, -) + +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings + +from sklearn.metrics.pairwise import euclidean_distances +from sklearn.metrics.pairwise import nan_euclidean_distances +from sklearn.metrics.pairwise import manhattan_distances +from sklearn.metrics.pairwise import haversine_distances +from sklearn.metrics.pairwise import linear_kernel +from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel +from sklearn.metrics.pairwise import polynomial_kernel +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.metrics.pairwise import laplacian_kernel +from sklearn.metrics.pairwise import sigmoid_kernel +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.metrics.pairwise import cosine_distances +from sklearn.metrics.pairwise import pairwise_distances +from sklearn.metrics.pairwise import pairwise_distances_chunked +from sklearn.metrics.pairwise import pairwise_distances_argmin_min +from sklearn.metrics.pairwise import pairwise_distances_argmin +from sklearn.metrics.pairwise import pairwise_kernels +from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS +from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS +from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS +from sklearn.metrics.pairwise import PAIRED_DISTANCES +from sklearn.metrics.pairwise import check_pairwise_arrays +from sklearn.metrics.pairwise import check_paired_arrays +from sklearn.metrics.pairwise import paired_distances +from sklearn.metrics.pairwise import paired_euclidean_distances +from sklearn.metrics.pairwise import paired_manhattan_distances +from sklearn.metrics.pairwise import _euclidean_distances_upcast from sklearn.preprocessing import normalize -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) -from sklearn.utils.fixes import parse_version, sp_version +from sklearn.exceptions import DataConversionWarning def test_pairwise_distances(): diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py index 06aca2dd3c036..192f7ef43a6c6 100644 --- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py +++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py @@ -1,18 +1,19 @@ import numpy as np import pytest import threadpoolctl -from numpy.testing import assert_allclose, assert_array_equal +from numpy.testing import assert_array_equal, assert_allclose from scipy.sparse import csr_matrix from scipy.spatial.distance import cdist -from sklearn.metrics import euclidean_distances from sklearn.metrics._pairwise_distances_reduction import ( + PairwiseDistancesReduction, PairwiseDistancesArgKmin, PairwiseDistancesRadiusNeighborhood, - PairwiseDistancesReduction, _sqeuclidean_row_norms, ) -from sklearn.utils.fixes import parse_version, sp_version + +from sklearn.metrics import euclidean_distances +from sklearn.utils.fixes import sp_version, parse_version # Common supported metric between scipy.spatial.distance.cdist # and PairwiseDistancesReduction. diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 9315cc8d29a4c..7d2338337b83d 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -1,44 +1,41 @@ import re -import warnings - -import numpy as np import pytest +import numpy as np +import warnings from scipy.sparse import csr_matrix -from sklearn import datasets, svm +from sklearn import datasets +from sklearn import svm + +from sklearn.utils.extmath import softmax from sklearn.datasets import make_multilabel_classification +from sklearn.random_projection import _sparse_random_matrix +from sklearn.utils.validation import check_array, check_consistent_length +from sklearn.utils.validation import check_random_state + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal + +from sklearn.metrics import accuracy_score +from sklearn.metrics import auc +from sklearn.metrics import average_precision_score +from sklearn.metrics import coverage_error +from sklearn.metrics import det_curve +from sklearn.metrics import label_ranking_average_precision_score +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import label_ranking_loss +from sklearn.metrics import roc_auc_score +from sklearn.metrics import roc_curve +from sklearn.metrics._ranking import _ndcg_sample_scores, _dcg_sample_scores +from sklearn.metrics import ndcg_score, dcg_score +from sklearn.metrics import top_k_accuracy_score + from sklearn.exceptions import UndefinedMetricWarning -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ( - accuracy_score, - auc, - average_precision_score, - coverage_error, - dcg_score, - det_curve, - label_ranking_average_precision_score, - label_ranking_loss, - ndcg_score, - precision_recall_curve, - roc_auc_score, - roc_curve, - top_k_accuracy_score, -) -from sklearn.metrics._ranking import _dcg_sample_scores, _ndcg_sample_scores from sklearn.model_selection import train_test_split -from sklearn.random_projection import _sparse_random_matrix -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) -from sklearn.utils.extmath import softmax -from sklearn.utils.validation import ( - check_array, - check_consistent_length, - check_random_state, -) +from sklearn.linear_model import LogisticRegression + ############################################################################### # Utilities for testing diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index f7265aab2e282..090bc64bf0fe4 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -1,36 +1,34 @@ -from itertools import product - import numpy as np -import pytest -from numpy.testing import assert_allclose from scipy import optimize +from numpy.testing import assert_allclose from scipy.special import factorial, xlogy +from itertools import product +import pytest +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal from sklearn.dummy import DummyRegressor -from sklearn.exceptions import UndefinedMetricWarning -from sklearn.metrics import ( - d2_absolute_error_score, - d2_pinball_score, - d2_tweedie_score, - explained_variance_score, - make_scorer, - max_error, - mean_absolute_error, - mean_absolute_percentage_error, - mean_pinball_loss, - mean_squared_error, - mean_squared_log_error, - mean_tweedie_deviance, - median_absolute_error, - r2_score, -) -from sklearn.metrics._regression import _check_reg_targets from sklearn.model_selection import GridSearchCV -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) + +from sklearn.metrics import explained_variance_score +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_squared_log_error +from sklearn.metrics import median_absolute_error +from sklearn.metrics import mean_absolute_percentage_error +from sklearn.metrics import max_error +from sklearn.metrics import mean_pinball_loss +from sklearn.metrics import r2_score +from sklearn.metrics import mean_tweedie_deviance +from sklearn.metrics import d2_tweedie_score +from sklearn.metrics import d2_pinball_score +from sklearn.metrics import d2_absolute_error_score +from sklearn.metrics import make_scorer + +from sklearn.metrics._regression import _check_reg_targets + +from sklearn.exceptions import UndefinedMetricWarning def test_regression_metrics(n_samples=50): diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index fec3c6469fbca..23680e48ae3e7 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -1,68 +1,61 @@ -import numbers -import os +from copy import deepcopy import pickle -import shutil import tempfile -from copy import deepcopy -from functools import partial +import shutil +import os +import numbers from unittest.mock import Mock +from functools import partial import numpy as np import pytest +import joblib + from numpy.testing import assert_allclose +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings -import joblib from sklearn.base import BaseEstimator -from sklearn.cluster import KMeans -from sklearn.datasets import ( - load_diabetes, - make_blobs, - make_classification, - make_multilabel_classification, - make_regression, -) -from sklearn.linear_model import LogisticRegression, Perceptron, Ridge from sklearn.metrics import ( - SCORERS, accuracy_score, - average_precision_score, balanced_accuracy_score, + average_precision_score, brier_score_loss, - check_scoring, -) -from sklearn.metrics import cluster as cluster_module -from sklearn.metrics import ( f1_score, fbeta_score, - get_scorer, - get_scorer_names, jaccard_score, log_loss, - make_scorer, - matthews_corrcoef, precision_score, r2_score, recall_score, roc_auc_score, top_k_accuracy_score, + matthews_corrcoef, ) +from sklearn.metrics import cluster as cluster_module +from sklearn.metrics import check_scoring from sklearn.metrics._scorer import ( - _check_multimetric_scoring, - _MultimetricScorer, - _passthrough_scorer, _PredictScorer, + _passthrough_scorer, + _MultimetricScorer, + _check_multimetric_scoring, ) -from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split -from sklearn.multiclass import OneVsRestClassifier +from sklearn.metrics import make_scorer, get_scorer, SCORERS, get_scorer_names from sklearn.neighbors import KNeighborsClassifier -from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC +from sklearn.pipeline import make_pipeline +from sklearn.cluster import KMeans +from sklearn.linear_model import Ridge, LogisticRegression, Perceptron from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_equal, - ignore_warnings, -) +from sklearn.datasets import make_blobs +from sklearn.datasets import make_classification, make_regression +from sklearn.datasets import make_multilabel_classification +from sklearn.datasets import load_diabetes +from sklearn.model_selection import train_test_split, cross_val_score +from sklearn.model_selection import GridSearchCV +from sklearn.multiclass import OneVsRestClassifier + REGRESSION_SCORERS = [ "explained_variance", diff --git a/sklearn/mixture/__init__.py b/sklearn/mixture/__init__.py index f0018196ffc98..c5c20aa38eb18 100644 --- a/sklearn/mixture/__init__.py +++ b/sklearn/mixture/__init__.py @@ -2,7 +2,8 @@ The :mod:`sklearn.mixture` module implements mixture modeling algorithms. """ -from ._bayesian_mixture import BayesianGaussianMixture from ._gaussian_mixture import GaussianMixture +from ._bayesian_mixture import BayesianGaussianMixture + __all__ = ["GaussianMixture", "BayesianGaussianMixture"] diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py index 15d5b01de356e..2edc3b57aa4d2 100644 --- a/sklearn/mixture/_base.py +++ b/sklearn/mixture/_base.py @@ -13,8 +13,9 @@ from scipy.special import logsumexp from .. import cluster -from ..base import BaseEstimator, DensityMixin from ..cluster import kmeans_plusplus +from ..base import BaseEstimator +from ..base import DensityMixin from ..exceptions import ConvergenceWarning from ..utils import check_random_state, check_scalar from ..utils.validation import check_is_fitted diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py index 617a8f944c9f0..704c191638863 100644 --- a/sklearn/mixture/_bayesian_mixture.py +++ b/sklearn/mixture/_bayesian_mixture.py @@ -4,20 +4,17 @@ # License: BSD 3 clause import math - import numpy as np from scipy.special import betaln, digamma, gammaln -from ..utils import check_array from ._base import BaseMixture, _check_shape -from ._gaussian_mixture import ( - _check_precision_matrix, - _check_precision_positivity, - _compute_log_det_cholesky, - _compute_precision_cholesky, - _estimate_gaussian_parameters, - _estimate_log_gaussian_prob, -) +from ._gaussian_mixture import _check_precision_matrix +from ._gaussian_mixture import _check_precision_positivity +from ._gaussian_mixture import _compute_log_det_cholesky +from ._gaussian_mixture import _compute_precision_cholesky +from ._gaussian_mixture import _estimate_gaussian_parameters +from ._gaussian_mixture import _estimate_log_gaussian_prob +from ..utils import check_array def _log_dirichlet_norm(dirichlet_concentration): diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py index 73a36e0e5e242..66a87c6e9c136 100644 --- a/sklearn/mixture/_gaussian_mixture.py +++ b/sklearn/mixture/_gaussian_mixture.py @@ -5,11 +5,13 @@ # License: BSD 3 clause import numpy as np + from scipy import linalg +from ._base import BaseMixture, _check_shape from ..utils import check_array from ..utils.extmath import row_norms -from ._base import BaseMixture, _check_shape + ############################################################################### # Gaussian mixture shape checkers used by the GaussianMixture class diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index 0600594550191..2cd54aef5b943 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -5,19 +5,23 @@ import re import numpy as np -import pytest from scipy.special import gammaln +import pytest + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal -from sklearn.exceptions import ConvergenceWarning, NotFittedError from sklearn.metrics.cluster import adjusted_rand_score + +from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm +from sklearn.mixture._bayesian_mixture import _log_wishart_norm + from sklearn.mixture import BayesianGaussianMixture -from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm + from sklearn.mixture.tests.test_gaussian_mixture import RandomData -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_equal, - ignore_warnings, -) +from sklearn.exceptions import ConvergenceWarning, NotFittedError +from sklearn.utils._testing import ignore_warnings + COVARIANCE_TYPE = ["full", "tied", "diag", "spherical"] PRIOR_TYPE = ["dirichlet_process", "dirichlet_distribution"] diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index f1e9e29c7efb4..e9a19c2239f8a 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -2,40 +2,39 @@ # Thierry Guillemot # License: BSD 3 clause -import copy import itertools import re import sys +import copy import warnings -from io import StringIO +import pytest import numpy as np -import pytest -from scipy import linalg, stats +from scipy import stats, linalg from sklearn.cluster import KMeans from sklearn.covariance import EmpiricalCovariance from sklearn.datasets import make_spd_matrix -from sklearn.exceptions import ConvergenceWarning, NotFittedError +from io import StringIO from sklearn.metrics.cluster import adjusted_rand_score from sklearn.mixture import GaussianMixture from sklearn.mixture._gaussian_mixture import ( - _compute_log_det_cholesky, - _compute_precision_cholesky, - _estimate_gaussian_covariances_diag, _estimate_gaussian_covariances_full, - _estimate_gaussian_covariances_spherical, _estimate_gaussian_covariances_tied, + _estimate_gaussian_covariances_diag, + _estimate_gaussian_covariances_spherical, _estimate_gaussian_parameters, + _compute_precision_cholesky, + _compute_log_det_cholesky, ) -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) +from sklearn.exceptions import ConvergenceWarning, NotFittedError from sklearn.utils.extmath import fast_logdet +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings + COVARIANCE_TYPE = ["full", "tied", "diag", "spherical"] diff --git a/sklearn/mixture/tests/test_mixture.py b/sklearn/mixture/tests/test_mixture.py index f0ea3494f0e7d..eeb71d0f89407 100644 --- a/sklearn/mixture/tests/test_mixture.py +++ b/sklearn/mixture/tests/test_mixture.py @@ -1,10 +1,11 @@ # Author: Guillaume Lemaitre # License: BSD 3 clause -import numpy as np import pytest +import numpy as np -from sklearn.mixture import BayesianGaussianMixture, GaussianMixture +from sklearn.mixture import GaussianMixture +from sklearn.mixture import BayesianGaussianMixture @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()]) diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py index 161b66a68edfc..a481f5db72fdf 100644 --- a/sklearn/model_selection/__init__.py +++ b/sklearn/model_selection/__init__.py @@ -1,35 +1,36 @@ import typing -from ._search import GridSearchCV, ParameterGrid, ParameterSampler, RandomizedSearchCV -from ._split import ( - BaseCrossValidator, - BaseShuffleSplit, - GroupKFold, - GroupShuffleSplit, - KFold, - LeaveOneGroupOut, - LeaveOneOut, - LeavePGroupsOut, - LeavePOut, - PredefinedSplit, - RepeatedKFold, - RepeatedStratifiedKFold, - ShuffleSplit, - StratifiedGroupKFold, - StratifiedKFold, - StratifiedShuffleSplit, - TimeSeriesSplit, - check_cv, - train_test_split, -) -from ._validation import ( - cross_val_predict, - cross_val_score, - cross_validate, - learning_curve, - permutation_test_score, - validation_curve, -) +from ._split import BaseCrossValidator +from ._split import BaseShuffleSplit +from ._split import KFold +from ._split import GroupKFold +from ._split import StratifiedKFold +from ._split import TimeSeriesSplit +from ._split import LeaveOneGroupOut +from ._split import LeaveOneOut +from ._split import LeavePGroupsOut +from ._split import LeavePOut +from ._split import RepeatedKFold +from ._split import RepeatedStratifiedKFold +from ._split import ShuffleSplit +from ._split import GroupShuffleSplit +from ._split import StratifiedShuffleSplit +from ._split import StratifiedGroupKFold +from ._split import PredefinedSplit +from ._split import train_test_split +from ._split import check_cv + +from ._validation import cross_val_score +from ._validation import cross_val_predict +from ._validation import cross_validate +from ._validation import learning_curve +from ._validation import permutation_test_score +from ._validation import validation_curve + +from ._search import GridSearchCV +from ._search import RandomizedSearchCV +from ._search import ParameterGrid +from ._search import ParameterSampler if typing.TYPE_CHECKING: # Avoid errors in type checkers (e.g. mypy) for experimental estimators. diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index f3ab5c40f82fe..5ceb71569b932 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -10,40 +10,38 @@ # Raghav RV # License: BSD 3 clause -import numbers -import operator -import time -import warnings from abc import ABCMeta, abstractmethod from collections import defaultdict -from collections.abc import Iterable, Mapping, Sequence +from collections.abc import Mapping, Sequence, Iterable from functools import partial, reduce from itertools import product +import numbers +import operator +import time +import warnings import numpy as np from numpy.ma import MaskedArray from scipy.stats import rankdata -from joblib import Parallel - -from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier +from ..base import BaseEstimator, is_classifier, clone +from ..base import MetaEstimatorMixin +from ._split import check_cv +from ._validation import _fit_and_score +from ._validation import _aggregate_score_dicts +from ._validation import _insert_error_scores +from ._validation import _normalize_score_results +from ._validation import _warn_or_raise_about_fit_failures from ..exceptions import NotFittedError -from ..metrics import check_scoring -from ..metrics._scorer import _check_multimetric_scoring +from joblib import Parallel from ..utils import check_random_state +from ..utils.random import sample_without_replacement from ..utils._tags import _safe_tags -from ..utils.fixes import delayed +from ..utils.validation import indexable, check_is_fitted, _check_fit_params from ..utils.metaestimators import available_if -from ..utils.random import sample_without_replacement -from ..utils.validation import _check_fit_params, check_is_fitted, indexable -from ._split import check_cv -from ._validation import ( - _aggregate_score_dicts, - _fit_and_score, - _insert_error_scores, - _normalize_score_results, - _warn_or_raise_about_fit_failures, -) +from ..utils.fixes import delayed +from ..metrics._scorer import _check_multimetric_scoring +from ..metrics import check_scoring __all__ = ["GridSearchCV", "ParameterGrid", "ParameterSampler", "RandomizedSearchCV"] diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py index 62aa157c9ace5..940c4c93831f5 100644 --- a/sklearn/model_selection/_search_successive_halving.py +++ b/sklearn/model_selection/_search_successive_halving.py @@ -1,17 +1,17 @@ -from abc import abstractmethod from copy import deepcopy from math import ceil, floor, log +from abc import abstractmethod from numbers import Integral import numpy as np - +from ._search import BaseSearchCV +from . import ParameterGrid, ParameterSampler from ..base import is_classifier +from ._split import check_cv, _yields_constant_splits from ..utils import resample from ..utils.multiclass import check_classification_targets from ..utils.validation import _num_samples -from . import ParameterGrid, ParameterSampler -from ._search import BaseSearchCV -from ._split import _yields_constant_splits, check_cv + __all__ = ["HalvingGridSearchCV", "HalvingRandomSearchCV"] diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 08ca4de3c7457..d2a0b5e1fc329 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -11,22 +11,24 @@ # Rodion Martynov # License: BSD 3 clause -import numbers -import warnings -from abc import ABCMeta, abstractmethod -from collections import defaultdict from collections.abc import Iterable -from inspect import signature +from collections import defaultdict +import warnings from itertools import chain, combinations from math import ceil, floor +import numbers +from abc import ABCMeta, abstractmethod +from inspect import signature import numpy as np from scipy.special import comb -from ..base import _pprint -from ..utils import _approximate_mode, _safe_indexing, check_random_state, indexable +from ..utils import indexable, check_random_state, _safe_indexing +from ..utils import _approximate_mode +from ..utils.validation import _num_samples, column_or_1d +from ..utils.validation import check_array from ..utils.multiclass import type_of_target -from ..utils.validation import _num_samples, check_array, column_or_1d +from ..base import _pprint __all__ = [ "BaseCrossValidator", diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 3c03cdfc44873..d83fca63da48c 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -11,29 +11,30 @@ # License: BSD 3 clause +import warnings import numbers import time -import warnings -from collections import Counter -from contextlib import suppress from functools import partial from traceback import format_exc +from contextlib import suppress +from collections import Counter import numpy as np import scipy.sparse as sp - from joblib import Parallel, logger -from ..base import clone, is_classifier -from ..exceptions import FitFailedWarning -from ..metrics import check_scoring -from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer -from ..preprocessing import LabelEncoder -from ..utils import _safe_indexing, check_random_state, indexable +from ..base import is_classifier, clone +from ..utils import indexable, check_random_state, _safe_indexing +from ..utils.validation import _check_fit_params +from ..utils.validation import _num_samples from ..utils.fixes import delayed from ..utils.metaestimators import _safe_split -from ..utils.validation import _check_fit_params, _num_samples +from ..metrics import check_scoring +from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer +from ..exceptions import FitFailedWarning from ._split import check_cv +from ..preprocessing import LabelEncoder + __all__ = [ "cross_validate", diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index c109f2915aa10..e1466d69d3902 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1,71 +1,75 @@ """Test the search module""" -import pickle -import re -import sys from collections.abc import Iterable, Sized -from functools import partial from io import StringIO from itertools import chain, product +from functools import partial +import pickle +import sys from types import GeneratorType +import re import numpy as np -import pytest import scipy.sparse as sp -from scipy.stats import bernoulli, expon, uniform +import pytest -from sklearn.base import BaseEstimator, ClassifierMixin, is_classifier -from sklearn.cluster import KMeans -from sklearn.datasets import ( - make_blobs, - make_classification, - make_multilabel_classification, -) -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.impute import SimpleImputer -from sklearn.linear_model import LinearRegression, Ridge, SGDClassifier -from sklearn.metrics import ( - accuracy_score, - confusion_matrix, - f1_score, - make_scorer, - r2_score, - recall_score, - roc_auc_score, -) -from sklearn.metrics.pairwise import euclidean_distances -from sklearn.model_selection import ( - GridSearchCV, - GroupKFold, - GroupShuffleSplit, - KFold, - LeaveOneGroupOut, - LeavePGroupsOut, - ParameterGrid, - ParameterSampler, - RandomizedSearchCV, - StratifiedKFold, - StratifiedShuffleSplit, - train_test_split, -) -from sklearn.model_selection._search import BaseSearchCV -from sklearn.model_selection._validation import FitFailedWarning -from sklearn.model_selection.tests.common import OneTimeSplitter -from sklearn.neighbors import KernelDensity, KNeighborsClassifier, LocalOutlierFactor -from sklearn.pipeline import Pipeline -from sklearn.svm import SVC, LinearSVC -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.utils._mocking import CheckingClassifier, MockDataFrame from sklearn.utils._testing import ( - MinimalClassifier, - MinimalRegressor, - MinimalTransformer, + assert_array_equal, + assert_array_almost_equal, assert_allclose, assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, ignore_warnings, + MinimalClassifier, + MinimalRegressor, + MinimalTransformer, ) +from sklearn.utils._mocking import CheckingClassifier, MockDataFrame + +from scipy.stats import bernoulli, expon, uniform + +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.base import is_classifier +from sklearn.datasets import make_classification +from sklearn.datasets import make_blobs +from sklearn.datasets import make_multilabel_classification + +from sklearn.model_selection import train_test_split +from sklearn.model_selection import KFold +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import StratifiedShuffleSplit +from sklearn.model_selection import LeaveOneGroupOut +from sklearn.model_selection import LeavePGroupsOut +from sklearn.model_selection import GroupKFold +from sklearn.model_selection import GroupShuffleSplit +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import RandomizedSearchCV +from sklearn.model_selection import ParameterGrid +from sklearn.model_selection import ParameterSampler +from sklearn.model_selection._search import BaseSearchCV + +from sklearn.model_selection._validation import FitFailedWarning + +from sklearn.svm import LinearSVC, SVC +from sklearn.tree import DecisionTreeRegressor +from sklearn.tree import DecisionTreeClassifier +from sklearn.cluster import KMeans +from sklearn.neighbors import KernelDensity +from sklearn.neighbors import LocalOutlierFactor +from sklearn.neighbors import KNeighborsClassifier +from sklearn.metrics import f1_score +from sklearn.metrics import recall_score +from sklearn.metrics import accuracy_score +from sklearn.metrics import make_scorer +from sklearn.metrics import roc_auc_score +from sklearn.metrics import confusion_matrix +from sklearn.metrics import r2_score +from sklearn.metrics.pairwise import euclidean_distances +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +from sklearn.linear_model import Ridge, SGDClassifier, LinearRegression +from sklearn.ensemble import HistGradientBoostingClassifier + +from sklearn.model_selection.tests.common import OneTimeSplitter # Neither of the following two estimators inherit from BaseEstimator, @@ -781,7 +785,7 @@ def test_pandas_input(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import DataFrame, Series + from pandas import Series, DataFrame types.append((DataFrame, Series)) except ImportError: diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 58abeb148fdfc..f502ebc8a3b6a 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -1,51 +1,52 @@ """Test the split module""" -import re import warnings -from itertools import combinations, combinations_with_replacement, permutations - -import numpy as np import pytest -from scipy import stats +import re +import numpy as np from scipy.sparse import coo_matrix, csc_matrix, csr_matrix +from scipy import stats from scipy.special import comb +from itertools import combinations +from itertools import combinations_with_replacement +from itertools import permutations + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.validation import _num_samples +from sklearn.utils._mocking import MockDataFrame + +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import KFold +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import GroupKFold +from sklearn.model_selection import TimeSeriesSplit +from sklearn.model_selection import LeaveOneOut +from sklearn.model_selection import LeaveOneGroupOut +from sklearn.model_selection import LeavePOut +from sklearn.model_selection import LeavePGroupsOut +from sklearn.model_selection import ShuffleSplit +from sklearn.model_selection import GroupShuffleSplit +from sklearn.model_selection import StratifiedShuffleSplit +from sklearn.model_selection import PredefinedSplit +from sklearn.model_selection import check_cv +from sklearn.model_selection import train_test_split +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import RepeatedKFold +from sklearn.model_selection import RepeatedStratifiedKFold +from sklearn.model_selection import StratifiedGroupKFold -from sklearn.datasets import load_digits, make_classification from sklearn.dummy import DummyClassifier -from sklearn.model_selection import ( - GridSearchCV, - GroupKFold, - GroupShuffleSplit, - KFold, - LeaveOneGroupOut, - LeaveOneOut, - LeavePGroupsOut, - LeavePOut, - PredefinedSplit, - RepeatedKFold, - RepeatedStratifiedKFold, - ShuffleSplit, - StratifiedGroupKFold, - StratifiedKFold, - StratifiedShuffleSplit, - TimeSeriesSplit, - check_cv, - cross_val_score, - train_test_split, -) -from sklearn.model_selection._split import ( - _build_repr, - _validate_shuffle_split, - _yields_constant_splits, -) + +from sklearn.model_selection._split import _validate_shuffle_split +from sklearn.model_selection._split import _build_repr +from sklearn.model_selection._split import _yields_constant_splits + +from sklearn.datasets import load_digits +from sklearn.datasets import make_classification + from sklearn.svm import SVC -from sklearn.utils._mocking import MockDataFrame -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) -from sklearn.utils.validation import _num_samples X = np.ones(10) y = np.arange(10) // 2 diff --git a/sklearn/model_selection/tests/test_successive_halving.py b/sklearn/model_selection/tests/test_successive_halving.py index 93502b403fbcf..fe06957f5deed 100644 --- a/sklearn/model_selection/tests/test_successive_halving.py +++ b/sklearn/model_selection/tests/test_successive_halving.py @@ -1,29 +1,26 @@ from math import ceil -import numpy as np import pytest from scipy.stats import norm, randint +import numpy as np from sklearn.datasets import make_classification from sklearn.dummy import DummyClassifier from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.model_selection import ( - GroupKFold, - GroupShuffleSplit, - HalvingGridSearchCV, - HalvingRandomSearchCV, - KFold, - LeaveOneGroupOut, - LeavePGroupsOut, - ShuffleSplit, - StratifiedKFold, - StratifiedShuffleSplit, -) +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import StratifiedShuffleSplit +from sklearn.model_selection import LeaveOneGroupOut +from sklearn.model_selection import LeavePGroupsOut +from sklearn.model_selection import GroupKFold +from sklearn.model_selection import GroupShuffleSplit +from sklearn.model_selection import HalvingGridSearchCV +from sklearn.model_selection import HalvingRandomSearchCV +from sklearn.model_selection import KFold, ShuffleSplit +from sklearn.svm import LinearSVC from sklearn.model_selection._search_successive_halving import ( _SubsampleMetaSplitter, _top_k, ) -from sklearn.svm import LinearSVC class FastClassifier(DummyClassifier): diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 5fedf3b802876..90b5a605ac2e4 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -5,83 +5,78 @@ import tempfile import warnings from functools import partial -from io import StringIO from time import sleep -import numpy as np import pytest +import numpy as np from scipy.sparse import coo_matrix, csr_matrix - -from sklearn.base import BaseEstimator, clone -from sklearn.cluster import KMeans -from sklearn.datasets import ( - load_diabetes, - load_digits, - load_iris, - make_classification, - make_multilabel_classification, - make_regression, -) -from sklearn.ensemble import RandomForestClassifier from sklearn.exceptions import FitFailedWarning -from sklearn.impute import SimpleImputer -from sklearn.linear_model import ( - LogisticRegression, - PassiveAggressiveClassifier, - Ridge, - RidgeClassifier, - SGDClassifier, -) -from sklearn.metrics import ( - accuracy_score, - check_scoring, - confusion_matrix, - explained_variance_score, - make_scorer, - mean_squared_error, - precision_recall_fscore_support, - precision_score, - r2_score, -) -from sklearn.model_selection import ( - GridSearchCV, - GroupKFold, - GroupShuffleSplit, - KFold, - LeaveOneGroupOut, - LeaveOneOut, - LeavePGroupsOut, - ShuffleSplit, - StratifiedKFold, - cross_val_predict, - cross_val_score, - cross_validate, - learning_curve, - permutation_test_score, - validation_curve, -) -from sklearn.model_selection._validation import ( - _check_is_permutation, - _fit_and_score, - _score, -) -from sklearn.model_selection.tests.common import OneTimeSplitter + from sklearn.model_selection.tests.test_search import FailingClassifier -from sklearn.multiclass import OneVsRestClassifier + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._mocking import CheckingClassifier, MockDataFrame + +from sklearn.utils.validation import _num_samples + +from sklearn.model_selection import cross_val_score, ShuffleSplit +from sklearn.model_selection import cross_val_predict +from sklearn.model_selection import cross_validate +from sklearn.model_selection import permutation_test_score +from sklearn.model_selection import KFold +from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import LeaveOneOut +from sklearn.model_selection import LeaveOneGroupOut +from sklearn.model_selection import LeavePGroupsOut +from sklearn.model_selection import GroupKFold +from sklearn.model_selection import GroupShuffleSplit +from sklearn.model_selection import learning_curve +from sklearn.model_selection import validation_curve +from sklearn.model_selection._validation import _check_is_permutation +from sklearn.model_selection._validation import _fit_and_score +from sklearn.model_selection._validation import _score + +from sklearn.datasets import make_regression +from sklearn.datasets import load_diabetes +from sklearn.datasets import load_iris +from sklearn.datasets import load_digits +from sklearn.metrics import explained_variance_score +from sklearn.metrics import make_scorer +from sklearn.metrics import accuracy_score +from sklearn.metrics import confusion_matrix +from sklearn.metrics import precision_recall_fscore_support +from sklearn.metrics import precision_score +from sklearn.metrics import r2_score +from sklearn.metrics import mean_squared_error +from sklearn.metrics import check_scoring + +from sklearn.linear_model import Ridge, LogisticRegression, SGDClassifier +from sklearn.linear_model import PassiveAggressiveClassifier, RidgeClassifier +from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier +from sklearn.svm import SVC, LinearSVC +from sklearn.cluster import KMeans from sklearn.neural_network import MLPRegressor -from sklearn.pipeline import Pipeline + +from sklearn.impute import SimpleImputer + from sklearn.preprocessing import LabelEncoder -from sklearn.svm import SVC, LinearSVC +from sklearn.pipeline import Pipeline + +from io import StringIO +from sklearn.base import BaseEstimator +from sklearn.base import clone +from sklearn.multiclass import OneVsRestClassifier from sklearn.utils import shuffle -from sklearn.utils._mocking import CheckingClassifier, MockDataFrame -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) -from sklearn.utils.validation import _num_samples +from sklearn.datasets import make_classification +from sklearn.datasets import make_multilabel_classification + +from sklearn.model_selection.tests.common import OneTimeSplitter +from sklearn.model_selection import GridSearchCV + try: WindowsError @@ -605,7 +600,7 @@ def test_cross_val_score_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import DataFrame, Series + from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: @@ -1106,7 +1101,7 @@ def test_cross_val_predict_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import DataFrame, Series + from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: @@ -2063,7 +2058,7 @@ def test_permutation_test_score_pandas(): # check permutation_test_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import DataFrame, Series + from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index f71cc0cdaf1ce..b46b4bfb8b5ef 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -34,35 +34,29 @@ # License: BSD 3 clause import array -import itertools -import warnings - import numpy as np +import warnings import scipy.sparse as sp +import itertools -from joblib import Parallel - -from .base import ( - BaseEstimator, - ClassifierMixin, - MetaEstimatorMixin, - MultiOutputMixin, - clone, - is_classifier, - is_regressor, -) -from .metrics.pairwise import euclidean_distances +from .base import BaseEstimator, ClassifierMixin, clone, is_classifier +from .base import MultiOutputMixin +from .base import MetaEstimatorMixin, is_regressor from .preprocessing import LabelBinarizer +from .metrics.pairwise import euclidean_distances from .utils import check_random_state from .utils._tags import _safe_tags -from .utils.fixes import delayed -from .utils.metaestimators import _safe_split, available_if +from .utils.validation import _num_samples +from .utils.validation import check_is_fitted from .utils.multiclass import ( _check_partial_fit_first_call, - _ovr_decision_function, check_classification_targets, + _ovr_decision_function, ) -from .utils.validation import _num_samples, check_is_fitted +from .utils.metaestimators import _safe_split, available_if +from .utils.fixes import delayed + +from joblib import Parallel __all__ = [ "OneVsRestClassifier", diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index a7fbb6b3eec94..24e4cc8dda7e8 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -14,27 +14,19 @@ # # License: BSD 3 clause -from abc import ABCMeta, abstractmethod - import numpy as np import scipy.sparse as sp - from joblib import Parallel -from .base import ( - BaseEstimator, - ClassifierMixin, - MetaEstimatorMixin, - RegressorMixin, - clone, - is_classifier, -) +from abc import ABCMeta, abstractmethod +from .base import BaseEstimator, clone, MetaEstimatorMixin +from .base import RegressorMixin, ClassifierMixin, is_classifier from .model_selection import cross_val_predict -from .utils import check_random_state -from .utils.fixes import delayed from .utils.metaestimators import available_if +from .utils import check_random_state +from .utils.validation import check_is_fitted, has_fit_parameter, _check_fit_params from .utils.multiclass import check_classification_targets -from .utils.validation import _check_fit_params, check_is_fitted, has_fit_parameter +from .utils.fixes import delayed __all__ = [ "MultiOutputRegressor", diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 33a150420eaaa..ca7be2d3799a3 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -14,17 +14,23 @@ # # License: BSD 3 clause import warnings + from abc import ABCMeta, abstractmethod + import numpy as np from scipy.special import logsumexp from .base import BaseEstimator, ClassifierMixin -from .preprocessing import LabelBinarizer, binarize, label_binarize +from .preprocessing import binarize +from .preprocessing import LabelBinarizer +from .preprocessing import label_binarize from .utils import deprecated from .utils.extmath import safe_sparse_dot from .utils.multiclass import _check_partial_fit_first_call -from .utils.validation import _check_sample_weight, check_is_fitted, check_non_negative +from .utils.validation import check_is_fitted, check_non_negative +from .utils.validation import _check_sample_weight + __all__ = [ "BernoulliNB", diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py index a01d460b8885d..12824e9cb684e 100644 --- a/sklearn/neighbors/__init__.py +++ b/sklearn/neighbors/__init__.py @@ -4,22 +4,19 @@ """ from ._ball_tree import BallTree -from ._base import VALID_METRICS, VALID_METRICS_SPARSE, sort_graph_by_row_values -from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier -from ._distance_metric import DistanceMetric -from ._graph import ( - KNeighborsTransformer, - RadiusNeighborsTransformer, - kneighbors_graph, - radius_neighbors_graph, -) from ._kd_tree import KDTree +from ._distance_metric import DistanceMetric +from ._graph import kneighbors_graph, radius_neighbors_graph +from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer +from ._unsupervised import NearestNeighbors +from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier +from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor +from ._nearest_centroid import NearestCentroid from ._kde import KernelDensity from ._lof import LocalOutlierFactor from ._nca import NeighborhoodComponentsAnalysis -from ._nearest_centroid import NearestCentroid -from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor -from ._unsupervised import NearestNeighbors +from ._base import sort_graph_by_row_values +from ._base import VALID_METRICS, VALID_METRICS_SPARSE __all__ = [ "BallTree", diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index 7ad28fa5dd358..8ecbaa351647c 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -6,30 +6,37 @@ # Multi-output support by Arnaud Joly # # License: BSD 3 clause (C) INRIA, University of Amsterdam -import numbers +from functools import partial + import warnings from abc import ABCMeta, abstractmethod -from functools import partial +import numbers import numpy as np from scipy.sparse import csr_matrix, issparse - from joblib import Parallel, effective_n_jobs -from ..base import BaseEstimator, MultiOutputMixin, is_classifier -from ..exceptions import DataConversionWarning, EfficiencyWarning +from ._ball_tree import BallTree +from ._kd_tree import KDTree +from ..base import BaseEstimator, MultiOutputMixin +from ..base import is_classifier from ..metrics import pairwise_distances_chunked +from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS from ..metrics._pairwise_distances_reduction import ( PairwiseDistancesArgKmin, PairwiseDistancesRadiusNeighborhood, ) -from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS -from ..utils import _to_object_array, check_array, gen_even_slices -from ..utils.fixes import delayed, parse_version, sp_version +from ..utils import ( + check_array, + gen_even_slices, + _to_object_array, +) from ..utils.multiclass import check_classification_targets -from ..utils.validation import check_is_fitted, check_non_negative -from ._ball_tree import BallTree -from ._kd_tree import KDTree +from ..utils.validation import check_is_fitted +from ..utils.validation import check_non_negative +from ..utils.fixes import delayed, sp_version +from ..utils.fixes import parse_version +from ..exceptions import DataConversionWarning, EfficiencyWarning VALID_METRICS = dict( ball_tree=BallTree.valid_metrics, diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi index 06fdb51cf42e9..36781a770906c 100644 --- a/sklearn/neighbors/_binary_tree.pxi +++ b/sklearn/neighbors/_binary_tree.pxi @@ -143,32 +143,29 @@ # """Compute the maximum distance between two nodes""" cimport numpy as np -from libc.math cimport cos, exp, fabs, fmax, fmin, lgamma, log, pow, sqrt -from libc.stdlib cimport calloc, free, malloc +from libc.math cimport fabs, sqrt, exp, cos, pow, log, lgamma +from libc.math cimport fmin, fmax +from libc.stdlib cimport calloc, malloc, free from libc.string cimport memcpy -import warnings - import numpy as np +import warnings from ..metrics._dist_metrics cimport ( DistanceMetric, euclidean_dist, - euclidean_dist_to_rdist, euclidean_rdist, + euclidean_dist_to_rdist, ) + from ._partition_nodes cimport partition_node_indices from ..utils import check_array - from ..utils._typedefs cimport DTYPE_t, ITYPE_t - from ..utils._typedefs import DTYPE, ITYPE - from ..utils._heap cimport heap_push from ..utils._sorting cimport simultaneous_sort as _simultaneous_sort - cdef extern from "numpy/arrayobject.h": void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) @@ -762,9 +759,7 @@ def newObj(obj): ###################################################################### # define the reverse mapping of VALID_METRICS - from sklearn.metrics._dist_metrics import get_valid_metric_ids - VALID_METRIC_IDS = get_valid_metric_ids(VALID_METRICS) diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index 35248b3b3846a..bcad8c71aee07 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -8,21 +8,15 @@ # # License: BSD 3 clause (C) INRIA, University of Amsterdam -import warnings - import numpy as np from scipy import stats - -from ..base import ClassifierMixin from ..utils.extmath import weighted_mode from ..utils.validation import _is_arraylike, _num_samples -from ._base import ( - KNeighborsMixin, - NeighborsBase, - RadiusNeighborsMixin, - _check_weights, - _get_weights, -) + +import warnings +from ._base import _check_weights, _get_weights +from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin +from ..base import ClassifierMixin class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase): diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py index 09df0073009b9..2be70c0638517 100644 --- a/sklearn/neighbors/_graph.py +++ b/sklearn/neighbors/_graph.py @@ -4,10 +4,11 @@ # Tom Dupre la Tour # # License: BSD 3 clause (C) INRIA, University of Amsterdam +from ._base import KNeighborsMixin, RadiusNeighborsMixin +from ._base import NeighborsBase +from ._unsupervised import NearestNeighbors from ..base import TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..utils.validation import check_is_fitted -from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin -from ._unsupervised import NearestNeighbors def _check_params(X, metric, p, metric_params): diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py index 41f1849c0bf3b..b707674993755 100644 --- a/sklearn/neighbors/_kde.py +++ b/sklearn/neighbors/_kde.py @@ -5,17 +5,17 @@ # Author: Jake Vanderplas import numbers - import numpy as np from scipy.special import gammainc - from ..base import BaseEstimator from ..utils import check_random_state, check_scalar -from ..utils.extmath import row_norms from ..utils.validation import _check_sample_weight, check_is_fitted -from ._ball_tree import DTYPE, BallTree + +from ..utils.extmath import row_norms +from ._ball_tree import BallTree, DTYPE from ._kd_tree import KDTree + VALID_KERNELS = [ "gaussian", "tophat", diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py index 7d7ced17378b0..025a1c6d80768 100644 --- a/sklearn/neighbors/_lof.py +++ b/sklearn/neighbors/_lof.py @@ -2,15 +2,16 @@ # Alexandre Gramfort # License: BSD 3 clause -import warnings - import numpy as np +import warnings +from ._base import NeighborsBase +from ._base import KNeighborsMixin from ..base import OutlierMixin -from ..utils import check_array + from ..utils.metaestimators import available_if from ..utils.validation import check_is_fitted -from ._base import KNeighborsMixin, NeighborsBase +from ..utils import check_array __all__ = ["LocalOutlierFactor"] diff --git a/sklearn/neighbors/_nca.py b/sklearn/neighbors/_nca.py index 2d5696c31d991..af76a000ef2cb 100644 --- a/sklearn/neighbors/_nca.py +++ b/sklearn/neighbors/_nca.py @@ -6,23 +6,21 @@ # John Chiotellis # License: BSD 3 clause -import numbers -import sys -import time from warnings import warn - import numpy as np +import sys +import time +import numbers from scipy.optimize import minimize - -from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..decomposition import PCA -from ..exceptions import ConvergenceWarning +from ..utils.extmath import softmax from ..metrics import pairwise_distances +from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin from ..preprocessing import LabelEncoder -from ..utils.extmath import softmax +from ..decomposition import PCA from ..utils.multiclass import check_classification_targets from ..utils.random import check_random_state -from ..utils.validation import check_array, check_is_fitted, check_scalar +from ..utils.validation import check_is_fitted, check_array, check_scalar +from ..exceptions import ConvergenceWarning class NeighborhoodComponentsAnalysis( diff --git a/sklearn/neighbors/_nearest_centroid.py b/sklearn/neighbors/_nearest_centroid.py index f90f04813a6b1..b52d9407333a6 100644 --- a/sklearn/neighbors/_nearest_centroid.py +++ b/sklearn/neighbors/_nearest_centroid.py @@ -8,16 +8,15 @@ # License: BSD 3 clause import warnings - import numpy as np from scipy import sparse as sp from ..base import BaseEstimator, ClassifierMixin from ..metrics.pairwise import pairwise_distances from ..preprocessing import LabelEncoder -from ..utils.multiclass import check_classification_targets -from ..utils.sparsefuncs import csc_median_axis_0 from ..utils.validation import check_is_fitted +from ..utils.sparsefuncs import csc_median_axis_0 +from ..utils.multiclass import check_classification_targets class NearestCentroid(ClassifierMixin, BaseEstimator): diff --git a/sklearn/neighbors/_partition_nodes.pxd b/sklearn/neighbors/_partition_nodes.pxd index d53989198c680..94b02002d7a1e 100644 --- a/sklearn/neighbors/_partition_nodes.pxd +++ b/sklearn/neighbors/_partition_nodes.pxd @@ -1,6 +1,5 @@ from ..utils._typedefs cimport DTYPE_t, ITYPE_t - cdef int partition_node_indices( DTYPE_t *data, ITYPE_t *node_indices, diff --git a/sklearn/neighbors/_quad_tree.pyx b/sklearn/neighbors/_quad_tree.pyx index be68dc77cc21b..6af7d1f547303 100644 --- a/sklearn/neighbors/_quad_tree.pyx +++ b/sklearn/neighbors/_quad_tree.pyx @@ -3,19 +3,17 @@ from cpython cimport Py_INCREF, PyObject, PyTypeObject -from libc.stdint cimport SIZE_MAX -from libc.stdio cimport printf -from libc.stdlib cimport free, malloc + +from libc.stdlib cimport malloc, free from libc.string cimport memcpy +from libc.stdio cimport printf +from libc.stdint cimport SIZE_MAX from ..tree._utils cimport safe_realloc, sizet_ptr_to_ndarray - -import numpy as np - from ..utils import check_array +import numpy as np cimport numpy as np - np.import_array() cdef extern from "math.h": diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py index fb837ae99c38e..4c995e5062277 100644 --- a/sklearn/neighbors/_regression.py +++ b/sklearn/neighbors/_regression.py @@ -14,14 +14,9 @@ import numpy as np +from ._base import _get_weights, _check_weights +from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin from ..base import RegressorMixin -from ._base import ( - KNeighborsMixin, - NeighborsBase, - RadiusNeighborsMixin, - _check_weights, - _get_weights, -) class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase): diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py index a57ba9454cb1c..6399363112378 100644 --- a/sklearn/neighbors/_unsupervised.py +++ b/sklearn/neighbors/_unsupervised.py @@ -1,5 +1,7 @@ """Unsupervised nearest neighbors learner""" -from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin +from ._base import NeighborsBase +from ._base import KNeighborsMixin +from ._base import RadiusNeighborsMixin class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin, NeighborsBase): diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index b232a02ebeb3d..d5046afd2da2a 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -3,11 +3,10 @@ import numpy as np import pytest from numpy.testing import assert_array_almost_equal - from sklearn.neighbors._ball_tree import BallTree from sklearn.utils import check_random_state -from sklearn.utils._testing import _convert_container from sklearn.utils.validation import check_array +from sklearn.utils._testing import _convert_container rng = np.random.RandomState(10) V_mahalanobis = rng.rand(3, 3) diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py index 7c368982edbd0..d8d9437636d1d 100644 --- a/sklearn/neighbors/tests/test_kd_tree.py +++ b/sklearn/neighbors/tests/test_kd_tree.py @@ -1,10 +1,10 @@ import numpy as np import pytest - from joblib import Parallel -from sklearn.neighbors._kd_tree import KDTree from sklearn.utils.fixes import delayed +from sklearn.neighbors._kd_tree import KDTree + DIMENSION = 3 METRICS = {"euclidean": {}, "manhattan": {}, "chebyshev": {}, "minkowski": dict(p=3)} diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py index 74d1d813d4086..8cbc613140720 100644 --- a/sklearn/neighbors/tests/test_kde.py +++ b/sklearn/neighbors/tests/test_kde.py @@ -1,15 +1,16 @@ import numpy as np + import pytest -import joblib -from sklearn.datasets import make_blobs -from sklearn.exceptions import NotFittedError -from sklearn.model_selection import GridSearchCV -from sklearn.neighbors import KDTree, KernelDensity, NearestNeighbors +from sklearn.utils._testing import assert_allclose +from sklearn.neighbors import KernelDensity, KDTree, NearestNeighbors from sklearn.neighbors._ball_tree import kernel_norm from sklearn.pipeline import make_pipeline +from sklearn.datasets import make_blobs +from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import StandardScaler -from sklearn.utils._testing import assert_allclose +from sklearn.exceptions import NotFittedError +import joblib # XXX Duplicated in test_neighbors_tree, test_kde diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py index 3c1698c56a075..e4b79c8f06668 100644 --- a/sklearn/neighbors/tests/test_lof.py +++ b/sklearn/neighbors/tests/test_lof.py @@ -2,22 +2,24 @@ # Alexandre Gramfort # License: BSD 3 clause -import re from math import sqrt import numpy as np +from sklearn import neighbors +import re import pytest from numpy.testing import assert_array_equal -from sklearn import metrics, neighbors -from sklearn.datasets import load_iris +from sklearn import metrics from sklearn.metrics import roc_auc_score + from sklearn.utils import check_random_state from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils.estimator_checks import ( - check_outlier_corruption, - parametrize_with_checks, -) +from sklearn.utils.estimator_checks import check_outlier_corruption +from sklearn.utils.estimator_checks import parametrize_with_checks + +from sklearn.datasets import load_iris + # load the iris dataset # and randomly permute it diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py index 2504c77486441..f6fdb343ccdf2 100644 --- a/sklearn/neighbors/tests/test_nca.py +++ b/sklearn/neighbors/tests/test_nca.py @@ -6,19 +6,18 @@ # John Chiotellis # License: BSD 3 clause +import pytest import re - import numpy as np -import pytest -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import assert_array_equal, assert_array_almost_equal from scipy.optimize import check_grad - from sklearn import clone -from sklearn.datasets import load_iris, make_blobs, make_classification from sklearn.exceptions import ConvergenceWarning -from sklearn.metrics import pairwise_distances -from sklearn.neighbors import NeighborhoodComponentsAnalysis from sklearn.utils import check_random_state +from sklearn.datasets import load_iris, make_classification, make_blobs +from sklearn.neighbors import NeighborhoodComponentsAnalysis +from sklearn.metrics import pairwise_distances + rng = check_random_state(0) # load and shuffle iris dataset diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py index a0b0189c992a3..c762b8390ed63 100644 --- a/sklearn/neighbors/tests/test_nearest_centroid.py +++ b/sklearn/neighbors/tests/test_nearest_centroid.py @@ -3,11 +3,11 @@ """ import numpy as np import pytest -from numpy.testing import assert_array_equal from scipy import sparse as sp +from numpy.testing import assert_array_equal -from sklearn import datasets from sklearn.neighbors import NearestCentroid +from sklearn import datasets # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 6bdfea276f335..337e777191475 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1,42 +1,58 @@ -import re -import warnings -from contextlib import nullcontext from itertools import product +from contextlib import nullcontext +import warnings -import numpy as np import pytest +import re +import numpy as np from scipy.sparse import ( bsr_matrix, coo_matrix, csc_matrix, csr_matrix, - dia_matrix, dok_matrix, - issparse, + dia_matrix, lil_matrix, + issparse, ) -import joblib -from sklearn import config_context, datasets, metrics, neighbors +from sklearn import ( + config_context, + datasets, + metrics, + neighbors, +) from sklearn.base import clone -from sklearn.exceptions import DataConversionWarning, EfficiencyWarning, NotFittedError +from sklearn.exceptions import DataConversionWarning +from sklearn.exceptions import EfficiencyWarning +from sklearn.exceptions import NotFittedError from sklearn.metrics.pairwise import pairwise_distances from sklearn.metrics.tests.test_dist_metrics import BOOL_METRICS from sklearn.metrics.tests.test_pairwise_distances_reduction import ( assert_radius_neighborhood_results_equality, ) -from sklearn.model_selection import cross_val_score, train_test_split -from sklearn.neighbors import VALID_METRICS_SPARSE, KNeighborsRegressor +from sklearn.model_selection import cross_val_score +from sklearn.model_selection import train_test_split +from sklearn.neighbors import ( + VALID_METRICS_SPARSE, + KNeighborsRegressor, +) from sklearn.neighbors._base import ( - KNeighborsMixin, - _check_precomputed, _is_sorted_by_data, + _check_precomputed, sort_graph_by_row_values, + KNeighborsMixin, ) from sklearn.pipeline import make_pipeline -from sklearn.utils._testing import assert_allclose, assert_array_equal, ignore_warnings -from sklearn.utils.fixes import parse_version, sp_version +from sklearn.utils._testing import ( + assert_allclose, + assert_array_equal, +) +from sklearn.utils._testing import ignore_warnings from sklearn.utils.validation import check_random_state +from sklearn.utils.fixes import sp_version, parse_version + +import joblib rng = np.random.RandomState(0) # load and shuffle iris dataset @@ -2120,8 +2136,8 @@ def test_auto_algorithm(X, metric, metric_params, expected_algo): # TODO: Remove in 1.3 def test_neighbors_distance_metric_deprecation(): - from sklearn.metrics import DistanceMetric as ActualDistanceMetric from sklearn.neighbors import DistanceMetric + from sklearn.metrics import DistanceMetric as ActualDistanceMetric msg = r"This import path will be removed in 1\.3" with pytest.warns(FutureWarning, match=msg): diff --git a/sklearn/neighbors/tests/test_neighbors_pipeline.py b/sklearn/neighbors/tests/test_neighbors_pipeline.py index dc4d59fbc2813..df56e64f5770b 100644 --- a/sklearn/neighbors/tests/test_neighbors_pipeline.py +++ b/sklearn/neighbors/tests/test_neighbors_pipeline.py @@ -8,20 +8,23 @@ import numpy as np import pytest -from sklearn.base import clone -from sklearn.cluster import DBSCAN, SpectralClustering +from sklearn.utils._testing import assert_array_almost_equal from sklearn.cluster.tests.common import generate_clustered_data from sklearn.datasets import make_blobs -from sklearn.manifold import TSNE, Isomap, SpectralEmbedding -from sklearn.neighbors import ( - KNeighborsRegressor, - KNeighborsTransformer, - LocalOutlierFactor, - RadiusNeighborsRegressor, - RadiusNeighborsTransformer, -) from sklearn.pipeline import make_pipeline -from sklearn.utils._testing import assert_array_almost_equal +from sklearn.base import clone + +from sklearn.neighbors import KNeighborsTransformer +from sklearn.neighbors import RadiusNeighborsTransformer + +from sklearn.cluster import DBSCAN +from sklearn.cluster import SpectralClustering +from sklearn.neighbors import KNeighborsRegressor +from sklearn.neighbors import RadiusNeighborsRegressor +from sklearn.neighbors import LocalOutlierFactor +from sklearn.manifold import SpectralEmbedding +from sklearn.manifold import Isomap +from sklearn.manifold import TSNE def test_spectral_clustering(): diff --git a/sklearn/neighbors/tests/test_neighbors_tree.py b/sklearn/neighbors/tests/test_neighbors_tree.py index 5a05fb1a51e7b..85d578c271faa 100644 --- a/sklearn/neighbors/tests/test_neighbors_tree.py +++ b/sklearn/neighbors/tests/test_neighbors_tree.py @@ -1,23 +1,30 @@ # License: BSD 3 clause -import itertools import pickle +import itertools import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_almost_equal from sklearn.metrics import DistanceMetric -from sklearn.neighbors._ball_tree import DTYPE, ITYPE, BallTree -from sklearn.neighbors._ball_tree import NeighborsHeap as NeighborsHeapBT -from sklearn.neighbors._ball_tree import kernel_norm -from sklearn.neighbors._ball_tree import nodeheap_sort as nodeheap_sort_bt -from sklearn.neighbors._ball_tree import simultaneous_sort as simultaneous_sort_bt -from sklearn.neighbors._kd_tree import KDTree -from sklearn.neighbors._kd_tree import NeighborsHeap as NeighborsHeapKDT -from sklearn.neighbors._kd_tree import nodeheap_sort as nodeheap_sort_kdt -from sklearn.neighbors._kd_tree import simultaneous_sort as simultaneous_sort_kdt +from sklearn.neighbors._ball_tree import ( + BallTree, + kernel_norm, + DTYPE, + ITYPE, + NeighborsHeap as NeighborsHeapBT, + simultaneous_sort as simultaneous_sort_bt, + nodeheap_sort as nodeheap_sort_bt, +) +from sklearn.neighbors._kd_tree import ( + KDTree, + NeighborsHeap as NeighborsHeapKDT, + simultaneous_sort as simultaneous_sort_kdt, + nodeheap_sort as nodeheap_sort_kdt, +) + from sklearn.utils import check_random_state +from numpy.testing import assert_array_almost_equal, assert_allclose rng = np.random.RandomState(42) V_mahalanobis = rng.rand(3, 3) diff --git a/sklearn/neighbors/tests/test_quad_tree.py b/sklearn/neighbors/tests/test_quad_tree.py index be9a4c5fe549d..bba79e2c8ee1a 100644 --- a/sklearn/neighbors/tests/test_quad_tree.py +++ b/sklearn/neighbors/tests/test_quad_tree.py @@ -1,6 +1,6 @@ import pickle - import numpy as np + import pytest from sklearn.neighbors._quad_tree import _QuadTree diff --git a/sklearn/neural_network/__init__.py b/sklearn/neural_network/__init__.py index 0b321b605de0b..7f6bad7bbd7e7 100644 --- a/sklearn/neural_network/__init__.py +++ b/sklearn/neural_network/__init__.py @@ -5,7 +5,9 @@ # License: BSD 3 clause -from ._multilayer_perceptron import MLPClassifier, MLPRegressor from ._rbm import BernoulliRBM +from ._multilayer_perceptron import MLPClassifier +from ._multilayer_perceptron import MLPRegressor + __all__ = ["BernoulliRBM", "MLPClassifier", "MLPRegressor"] diff --git a/sklearn/neural_network/_base.py b/sklearn/neural_network/_base.py index 73d62f9543e98..0e40739556e18 100644 --- a/sklearn/neural_network/_base.py +++ b/sklearn/neural_network/_base.py @@ -5,6 +5,7 @@ # License: BSD 3 clause import numpy as np + from scipy.special import expit as logistic_sigmoid from scipy.special import xlogy diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index 0fb3b821503ab..5fb4d7c64ffee 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -6,35 +6,36 @@ # Jiyuan Qian # License: BSD 3 clause -import warnings +import numpy as np + from abc import ABCMeta, abstractmethod +import warnings from itertools import chain -import numpy as np import scipy.optimize -from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, is_classifier -from ..exceptions import ConvergenceWarning +from ..base import ( + BaseEstimator, + ClassifierMixin, + RegressorMixin, +) +from ..base import is_classifier +from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS +from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer from ..model_selection import train_test_split from ..preprocessing import LabelBinarizer -from ..utils import ( - _safe_indexing, - check_random_state, - column_or_1d, - gen_batches, - shuffle, -) +from ..utils import gen_batches, check_random_state +from ..utils import shuffle +from ..utils import _safe_indexing +from ..utils import column_or_1d +from ..exceptions import ConvergenceWarning from ..utils.extmath import safe_sparse_dot -from ..utils.metaestimators import available_if -from ..utils.multiclass import ( - _check_partial_fit_first_call, - type_of_target, - unique_labels, -) -from ..utils.optimize import _check_optimize_result from ..utils.validation import check_is_fitted -from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS -from ._stochastic_optimizers import AdamOptimizer, SGDOptimizer +from ..utils.multiclass import _check_partial_fit_first_call, unique_labels +from ..utils.multiclass import type_of_target +from ..utils.optimize import _check_optimize_result +from ..utils.metaestimators import available_if + _STOCHASTIC_SOLVERS = ["sgd", "adam"] diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py index 24f142f47c418..aac92c3108787 100644 --- a/sklearn/neural_network/_rbm.py +++ b/sklearn/neural_network/_rbm.py @@ -13,9 +13,13 @@ import scipy.sparse as sp from scipy.special import expit # logistic function -from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from ..utils import check_random_state, gen_even_slices -from ..utils.extmath import log_logistic, safe_sparse_dot +from ..base import BaseEstimator +from ..base import TransformerMixin +from ..base import _ClassNamePrefixFeaturesOutMixin +from ..utils import check_random_state +from ..utils import gen_even_slices +from ..utils.extmath import safe_sparse_dot +from ..utils.extmath import log_logistic from ..utils.validation import check_is_fitted diff --git a/sklearn/neural_network/tests/test_base.py b/sklearn/neural_network/tests/test_base.py index af7b38e899907..32aa7f1fee917 100644 --- a/sklearn/neural_network/tests/test_base.py +++ b/sklearn/neural_network/tests/test_base.py @@ -1,7 +1,8 @@ -import numpy as np import pytest +import numpy as np -from sklearn.neural_network._base import binary_log_loss, log_loss +from sklearn.neural_network._base import binary_log_loss +from sklearn.neural_network._base import log_loss def test_binary_log_loss_1_prob_finite(): diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py index 78f285f6020ea..999983d751cc1 100644 --- a/sklearn/neural_network/tests/test_mlp.py +++ b/sklearn/neural_network/tests/test_mlp.py @@ -5,29 +5,33 @@ # Author: Issam H. Laradji # License: BSD 3 clause -import re +import pytest import sys import warnings -from io import StringIO +import re import numpy as np -import pytest -from numpy.testing import assert_allclose, assert_almost_equal, assert_array_equal -from scipy.sparse import csr_matrix - import joblib -from sklearn.datasets import ( - load_digits, - load_iris, - make_multilabel_classification, - make_regression, + +from numpy.testing import ( + assert_almost_equal, + assert_array_equal, + assert_allclose, ) + +from sklearn.datasets import load_digits, load_iris +from sklearn.datasets import make_regression, make_multilabel_classification from sklearn.exceptions import ConvergenceWarning +from io import StringIO from sklearn.metrics import roc_auc_score -from sklearn.neural_network import MLPClassifier, MLPRegressor -from sklearn.preprocessing import LabelBinarizer, MinMaxScaler, scale +from sklearn.neural_network import MLPClassifier +from sklearn.neural_network import MLPRegressor +from sklearn.preprocessing import LabelBinarizer +from sklearn.preprocessing import MinMaxScaler, scale +from scipy.sparse import csr_matrix from sklearn.utils._testing import ignore_warnings + ACTIVATION_TYPES = ["identity", "logistic", "tanh", "relu"] X_digits, y_digits = load_digits(n_class=3, return_X_y=True) diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py index e46723628ac2c..d36fa6b0bd11f 100644 --- a/sklearn/neural_network/tests/test_rbm.py +++ b/sklearn/neural_network/tests/test_rbm.py @@ -1,18 +1,18 @@ -import re import sys -from io import StringIO +import re +import pytest import numpy as np -import pytest from scipy.sparse import csc_matrix, csr_matrix, lil_matrix - -from sklearn.datasets import load_digits -from sklearn.neural_network import BernoulliRBM from sklearn.utils._testing import ( - assert_allclose, assert_almost_equal, assert_array_equal, + assert_allclose, ) + +from sklearn.datasets import load_digits +from io import StringIO +from sklearn.neural_network import BernoulliRBM from sklearn.utils.validation import assert_all_finite Xdigits, _ = load_digits(return_X_y=True) diff --git a/sklearn/neural_network/tests/test_stochastic_optimizers.py b/sklearn/neural_network/tests/test_stochastic_optimizers.py index 58a9f0c7dda13..e876892f28daf 100644 --- a/sklearn/neural_network/tests/test_stochastic_optimizers.py +++ b/sklearn/neural_network/tests/test_stochastic_optimizers.py @@ -1,12 +1,13 @@ import numpy as np from sklearn.neural_network._stochastic_optimizers import ( - AdamOptimizer, BaseOptimizer, SGDOptimizer, + AdamOptimizer, ) from sklearn.utils._testing import assert_array_equal + shapes = [(4, 6), (6, 8), (7, 8, 9)] diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 499c46be72fa9..74347f250bc83 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -14,19 +14,24 @@ import numpy as np from scipy import sparse - from joblib import Parallel -from .base import TransformerMixin, clone -from .exceptions import NotFittedError +from .base import clone, TransformerMixin from .preprocessing import FunctionTransformer -from .utils import Bunch, _print_elapsed_time from .utils._estimator_html_repr import _VisualBlock -from .utils._tags import _safe_tags +from .utils.metaestimators import available_if +from .utils import ( + Bunch, + _print_elapsed_time, +) from .utils.deprecation import deprecated +from .utils._tags import _safe_tags +from .utils.validation import check_memory +from .utils.validation import check_is_fitted from .utils.fixes import delayed -from .utils.metaestimators import _BaseComposition, available_if -from .utils.validation import check_is_fitted, check_memory +from .exceptions import NotFittedError + +from .utils.metaestimators import _BaseComposition __all__ = ["Pipeline", "FeatureUnion", "make_pipeline", "make_union"] diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py index faafcc6b9f08a..ccea91545a467 100644 --- a/sklearn/preprocessing/__init__.py +++ b/sklearn/preprocessing/__init__.py @@ -3,31 +3,40 @@ normalization, binarization methods. """ -from ._data import ( - Binarizer, - KernelCenterer, - MaxAbsScaler, - MinMaxScaler, - Normalizer, - PowerTransformer, - QuantileTransformer, - RobustScaler, - StandardScaler, - add_dummy_feature, - binarize, - maxabs_scale, - minmax_scale, - normalize, - power_transform, - quantile_transform, - robust_scale, - scale, -) -from ._discretization import KBinsDiscretizer -from ._encoders import OneHotEncoder, OrdinalEncoder from ._function_transformer import FunctionTransformer -from ._label import LabelBinarizer, LabelEncoder, MultiLabelBinarizer, label_binarize -from ._polynomial import PolynomialFeatures, SplineTransformer + +from ._data import Binarizer +from ._data import KernelCenterer +from ._data import MinMaxScaler +from ._data import MaxAbsScaler +from ._data import Normalizer +from ._data import RobustScaler +from ._data import StandardScaler +from ._data import QuantileTransformer +from ._data import add_dummy_feature +from ._data import binarize +from ._data import normalize +from ._data import scale +from ._data import robust_scale +from ._data import maxabs_scale +from ._data import minmax_scale +from ._data import quantile_transform +from ._data import power_transform +from ._data import PowerTransformer + +from ._encoders import OneHotEncoder +from ._encoders import OrdinalEncoder + +from ._label import label_binarize +from ._label import LabelBinarizer +from ._label import LabelEncoder +from ._label import MultiLabelBinarizer + +from ._discretization import KBinsDiscretizer + +from ._polynomial import PolynomialFeatures +from ._polynomial import SplineTransformer + __all__ = [ "Binarizer", diff --git a/sklearn/preprocessing/_csr_polynomial_expansion.pyx b/sklearn/preprocessing/_csr_polynomial_expansion.pyx index 9bf55452a34a5..ef958b12266e1 100644 --- a/sklearn/preprocessing/_csr_polynomial_expansion.pyx +++ b/sklearn/preprocessing/_csr_polynomial_expansion.pyx @@ -1,9 +1,8 @@ # Author: Andrew nystrom from scipy.sparse import csr_matrix - -cimport numpy as np from numpy cimport ndarray +cimport numpy as np np.import_array() ctypedef np.int32_t INDEX_T diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 6f4607544a2cb..f0088aab521ad 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -11,35 +11,39 @@ import warnings import numpy as np -from scipy import optimize, sparse, stats +from scipy import sparse +from scipy import stats +from scipy import optimize from scipy.special import boxcox from ..base import ( BaseEstimator, TransformerMixin, - _ClassNamePrefixFeaturesOutMixin, _OneToOneFeatureMixin, + _ClassNamePrefixFeaturesOutMixin, ) from ..utils import check_array from ..utils.extmath import _incremental_mean_and_var, row_norms +from ..utils.sparsefuncs_fast import ( + inplace_csr_row_normalize_l1, + inplace_csr_row_normalize_l2, +) from ..utils.sparsefuncs import ( - incr_mean_variance_axis, inplace_column_scale, mean_variance_axis, + incr_mean_variance_axis, min_max_axis, ) -from ..utils.sparsefuncs_fast import ( - inplace_csr_row_normalize_l1, - inplace_csr_row_normalize_l2, -) from ..utils.validation import ( - FLOAT_DTYPES, - _check_sample_weight, check_is_fitted, check_random_state, + _check_sample_weight, + FLOAT_DTYPES, ) + from ._encoders import OneHotEncoder + BOUNDS_THRESHOLD = 1e-7 __all__ = [ diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index 50d6ebbe948c5..478051c52c630 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -5,20 +5,18 @@ import numbers +import numpy as np import warnings -import numpy as np +from . import OneHotEncoder from ..base import BaseEstimator, TransformerMixin +from ..utils.validation import check_array +from ..utils.validation import check_is_fitted +from ..utils.validation import check_random_state +from ..utils.validation import _check_feature_names_in +from ..utils.validation import check_scalar from ..utils import _safe_indexing -from ..utils.validation import ( - _check_feature_names_in, - check_array, - check_is_fitted, - check_random_state, - check_scalar, -) -from ._encoders import OneHotEncoder class KBinsDiscretizer(TransformerMixin, BaseEstimator): diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index b8cf86f1aaf76..d4cc642a18562 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -10,10 +10,13 @@ from ..base import BaseEstimator, TransformerMixin, _OneToOneFeatureMixin from ..utils import check_array, is_scalar_nan -from ..utils._encode import _check_unknown, _encode, _get_counts, _unique -from ..utils._mask import _get_mask from ..utils.deprecation import deprecated -from ..utils.validation import _check_feature_names_in, check_is_fitted +from ..utils.validation import check_is_fitted +from ..utils.validation import _check_feature_names_in +from ..utils._mask import _get_mask + +from ..utils._encode import _encode, _check_unknown, _unique, _get_counts + __all__ = ["OneHotEncoder", "OrdinalEncoder"] diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 58ee406675779..e7f4a5e337208 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -6,20 +6,23 @@ # Hamzeh Alsalhi # License: BSD 3 clause -import array +from collections import defaultdict import itertools +import array import warnings -from collections import defaultdict import numpy as np import scipy.sparse as sp from ..base import BaseEstimator, TransformerMixin -from ..utils import column_or_1d -from ..utils._encode import _encode, _unique -from ..utils.multiclass import type_of_target, unique_labels + from ..utils.sparsefuncs import min_max_axis +from ..utils import column_or_1d from ..utils.validation import _num_samples, check_array, check_is_fitted +from ..utils.multiclass import unique_labels +from ..utils.multiclass import type_of_target +from ..utils._encode import _encode, _unique + __all__ = [ "label_binarize", diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py index 15ecf8ba0016e..09306cc56ab11 100644 --- a/sklearn/preprocessing/_polynomial.py +++ b/sklearn/preprocessing/_polynomial.py @@ -14,15 +14,13 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array from ..utils.deprecation import deprecated +from ..utils.validation import check_is_fitted, FLOAT_DTYPES, _check_sample_weight +from ..utils.validation import _check_feature_names_in from ..utils.stats import _weighted_percentile -from ..utils.validation import ( - FLOAT_DTYPES, - _check_feature_names_in, - _check_sample_weight, - check_is_fitted, -) + from ._csr_polynomial_expansion import _csr_polynomial_expansion + __all__ = [ "PolynomialFeatures", "SplineTransformer", diff --git a/sklearn/preprocessing/tests/test_common.py b/sklearn/preprocessing/tests/test_common.py index 9ebef6c000050..98b8dcdfe0e2a 100644 --- a/sklearn/preprocessing/tests/test_common.py +++ b/sklearn/preprocessing/tests/test_common.py @@ -1,27 +1,31 @@ import warnings -import numpy as np import pytest +import numpy as np + from scipy import sparse -from sklearn.base import clone from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split -from sklearn.preprocessing import ( - MaxAbsScaler, - MinMaxScaler, - PowerTransformer, - QuantileTransformer, - RobustScaler, - StandardScaler, - maxabs_scale, - minmax_scale, - power_transform, - quantile_transform, - robust_scale, - scale, -) -from sklearn.utils._testing import assert_allclose, assert_array_equal + +from sklearn.base import clone + +from sklearn.preprocessing import maxabs_scale +from sklearn.preprocessing import minmax_scale +from sklearn.preprocessing import scale +from sklearn.preprocessing import power_transform +from sklearn.preprocessing import quantile_transform +from sklearn.preprocessing import robust_scale + +from sklearn.preprocessing import MaxAbsScaler +from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import PowerTransformer +from sklearn.preprocessing import QuantileTransformer +from sklearn.preprocessing import RobustScaler + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose iris = load_iris() diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 894700168bf49..aab28fce336a4 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -4,54 +4,59 @@ # # License: BSD 3 clause -import itertools -import re import warnings +import itertools +import re import numpy as np import numpy.linalg as la -import pytest from scipy import sparse, stats -from sklearn import datasets -from sklearn.base import clone -from sklearn.exceptions import NotFittedError +import pytest + +from sklearn.utils import gen_batches + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_less +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import skip_if_32bit +from sklearn.utils._testing import _convert_container + +from sklearn.utils.sparsefuncs import mean_variance_axis +from sklearn.preprocessing import Binarizer +from sklearn.preprocessing import KernelCenterer +from sklearn.preprocessing import Normalizer +from sklearn.preprocessing import normalize +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import scale +from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import minmax_scale +from sklearn.preprocessing import QuantileTransformer +from sklearn.preprocessing import quantile_transform +from sklearn.preprocessing import MaxAbsScaler +from sklearn.preprocessing import maxabs_scale +from sklearn.preprocessing import RobustScaler +from sklearn.preprocessing import robust_scale +from sklearn.preprocessing import add_dummy_feature +from sklearn.preprocessing import PowerTransformer +from sklearn.preprocessing import power_transform +from sklearn.preprocessing._data import _handle_zeros_in_scale +from sklearn.preprocessing._data import BOUNDS_THRESHOLD from sklearn.metrics.pairwise import linear_kernel -from sklearn.model_selection import cross_val_predict + +from sklearn.exceptions import NotFittedError + +from sklearn.base import clone from sklearn.pipeline import Pipeline -from sklearn.preprocessing import ( - Binarizer, - KernelCenterer, - MaxAbsScaler, - MinMaxScaler, - Normalizer, - PowerTransformer, - QuantileTransformer, - RobustScaler, - StandardScaler, - add_dummy_feature, - maxabs_scale, - minmax_scale, - normalize, - power_transform, - quantile_transform, - robust_scale, - scale, -) -from sklearn.preprocessing._data import BOUNDS_THRESHOLD, _handle_zeros_in_scale +from sklearn.model_selection import cross_val_predict from sklearn.svm import SVR -from sklearn.utils import gen_batches, shuffle -from sklearn.utils._testing import ( - _convert_container, - assert_allclose, - assert_allclose_dense_sparse, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - assert_array_less, - skip_if_32bit, -) -from sklearn.utils.sparsefuncs import mean_variance_axis +from sklearn.utils import shuffle + +from sklearn import datasets + iris = datasets.load_iris() diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py index cc37af1873300..e1317acb97808 100644 --- a/sklearn/preprocessing/tests/test_discretization.py +++ b/sklearn/preprocessing/tests/test_discretization.py @@ -1,15 +1,15 @@ -import warnings - -import numpy as np import pytest +import numpy as np import scipy.sparse as sp +import warnings from sklearn import clone -from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder +from sklearn.preprocessing import KBinsDiscretizer +from sklearn.preprocessing import OneHotEncoder from sklearn.utils._testing import ( - assert_allclose_dense_sparse, assert_array_almost_equal, assert_array_equal, + assert_allclose_dense_sparse, ) X = [[-2, 1.5, -4, -1], [-1, 2.5, -3, -0.5], [0, 3.5, -2, 0.5], [1, 4.5, -1, 2]] diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py index 8679bd07ae6f0..ea32de22cd2f0 100644 --- a/sklearn/preprocessing/tests/test_encoders.py +++ b/sklearn/preprocessing/tests/test_encoders.py @@ -1,17 +1,17 @@ import re import numpy as np -import pytest from scipy import sparse +import pytest from sklearn.exceptions import NotFittedError -from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import _convert_container from sklearn.utils import is_scalar_nan -from sklearn.utils._testing import ( - _convert_container, - assert_allclose, - assert_array_equal, -) + +from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import OrdinalEncoder def test_one_hot_encoder_sparse_dense(): diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py index 6fce8d48fa471..98b8d75da38b4 100644 --- a/sklearn/preprocessing/tests/test_function_transformer.py +++ b/sklearn/preprocessing/tests/test_function_transformer.py @@ -1,15 +1,15 @@ import warnings -import numpy as np import pytest +import numpy as np from scipy import sparse +from sklearn.utils import _safe_indexing from sklearn.preprocessing import FunctionTransformer -from sklearn.utils import _safe_indexing from sklearn.utils._testing import ( - _convert_container, - assert_allclose_dense_sparse, assert_array_equal, + assert_allclose_dense_sparse, + _convert_container, ) diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index 90efe990eee58..a59cd9b152d27 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -1,27 +1,30 @@ import numpy as np + import pytest -from scipy.sparse import ( - coo_matrix, - csc_matrix, - csr_matrix, - dok_matrix, - issparse, - lil_matrix, -) -from sklearn import datasets -from sklearn.preprocessing._label import ( - LabelBinarizer, - LabelEncoder, - MultiLabelBinarizer, - _inverse_binarize_multiclass, - _inverse_binarize_thresholding, - label_binarize, -) -from sklearn.utils import _to_object_array -from sklearn.utils._testing import assert_array_equal, ignore_warnings +from scipy.sparse import issparse +from scipy.sparse import coo_matrix +from scipy.sparse import csc_matrix +from scipy.sparse import csr_matrix +from scipy.sparse import dok_matrix +from scipy.sparse import lil_matrix + from sklearn.utils.multiclass import type_of_target +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils import _to_object_array + +from sklearn.preprocessing._label import LabelBinarizer +from sklearn.preprocessing._label import MultiLabelBinarizer +from sklearn.preprocessing._label import LabelEncoder +from sklearn.preprocessing._label import label_binarize + +from sklearn.preprocessing._label import _inverse_binarize_thresholding +from sklearn.preprocessing._label import _inverse_binarize_multiclass + +from sklearn import datasets + iris = datasets.load_iris() diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py index 7504cd3bf23c2..2129247125d6c 100644 --- a/sklearn/preprocessing/tests/test_polynomial.py +++ b/sklearn/preprocessing/tests/test_polynomial.py @@ -1,10 +1,11 @@ import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_equal from scipy import sparse -from scipy.interpolate import BSpline from scipy.sparse import random as sparse_random +from sklearn.utils._testing import assert_array_almost_equal +from numpy.testing import assert_allclose, assert_array_equal +from scipy.interpolate import BSpline from sklearn.linear_model import LinearRegression from sklearn.pipeline import Pipeline from sklearn.preprocessing import ( @@ -12,7 +13,6 @@ PolynomialFeatures, SplineTransformer, ) -from sklearn.utils._testing import assert_array_almost_equal @pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer)) diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 7f1a17323e0ee..3b4a5e2236db5 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -30,15 +30,17 @@ from abc import ABCMeta, abstractmethod import numpy as np -import scipy.sparse as sp from scipy import linalg +import scipy.sparse as sp + +from .base import BaseEstimator, TransformerMixin +from .base import _ClassNamePrefixFeaturesOutMixin -from .base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin -from .exceptions import DataDimensionalityWarning from .utils import check_random_state from .utils.extmath import safe_sparse_dot from .utils.random import sample_without_replacement from .utils.validation import check_array, check_is_fitted +from .exceptions import DataDimensionalityWarning __all__ = [ "SparseRandomProjection", diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py index 04a82bd10e27a..1e3684797ebe6 100644 --- a/sklearn/semi_supervised/_label_propagation.py +++ b/sklearn/semi_supervised/_label_propagation.py @@ -52,24 +52,23 @@ Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005 """ -import warnings - # Authors: Clay Woolam # Utkarsh Upadhyay # License: BSD from abc import ABCMeta, abstractmethod +import warnings import numpy as np from scipy import sparse from scipy.sparse import csgraph from ..base import BaseEstimator, ClassifierMixin -from ..exceptions import ConvergenceWarning from ..metrics.pairwise import rbf_kernel from ..neighbors import NearestNeighbors from ..utils.extmath import safe_sparse_dot from ..utils.multiclass import check_classification_targets from ..utils.validation import check_is_fitted +from ..exceptions import ConvergenceWarning class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta): diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py index 5214d8f86a317..9b21cd273bfb9 100644 --- a/sklearn/semi_supervised/_self_training.py +++ b/sklearn/semi_supervised/_self_training.py @@ -2,10 +2,10 @@ import numpy as np -from ..base import BaseEstimator, MetaEstimatorMixin, clone -from ..utils import safe_mask -from ..utils.metaestimators import available_if +from ..base import MetaEstimatorMixin, clone, BaseEstimator from ..utils.validation import check_is_fitted +from ..utils.metaestimators import available_if +from ..utils import safe_mask __all__ = ["SelfTrainingClassifier"] diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index 3a862da40a0c7..7fbca2f83522b 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -1,18 +1,18 @@ """ test the label propagation module """ -import warnings - import numpy as np import pytest -from numpy.testing import assert_array_almost_equal, assert_array_equal -from scipy.sparse import issparse +import warnings -from sklearn.datasets import make_classification -from sklearn.exceptions import ConvergenceWarning +from scipy.sparse import issparse +from sklearn.semi_supervised import _label_propagation as label_propagation from sklearn.metrics.pairwise import rbf_kernel from sklearn.model_selection import train_test_split from sklearn.neighbors import NearestNeighbors -from sklearn.semi_supervised import _label_propagation as label_propagation +from sklearn.datasets import make_classification +from sklearn.exceptions import ConvergenceWarning +from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_equal ESTIMATORS = [ (label_propagation.LabelPropagation, {"kernel": "rbf"}), diff --git a/sklearn/semi_supervised/tests/test_self_training.py b/sklearn/semi_supervised/tests/test_self_training.py index 2b1228640eb74..49198927aa8d9 100644 --- a/sklearn/semi_supervised/tests/test_self_training.py +++ b/sklearn/semi_supervised/tests/test_self_training.py @@ -1,17 +1,18 @@ from math import ceil import numpy as np -import pytest from numpy.testing import assert_array_equal +import pytest -from sklearn.datasets import load_iris, make_blobs from sklearn.ensemble import StackingClassifier from sklearn.exceptions import NotFittedError -from sklearn.metrics import accuracy_score -from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier -from sklearn.semi_supervised import SelfTrainingClassifier from sklearn.svm import SVC +from sklearn.model_selection import train_test_split +from sklearn.datasets import load_iris, make_blobs +from sklearn.metrics import accuracy_score + +from sklearn.semi_supervised import SelfTrainingClassifier # Author: Oliver Rausch # License: BSD 3 clause diff --git a/sklearn/setup.py b/sklearn/setup.py index 6e1e18180180c..874bdbbcbed43 100644 --- a/sklearn/setup.py +++ b/sklearn/setup.py @@ -1,12 +1,12 @@ -import os import sys +import os from sklearn._build_utils import cythonize_extensions def configuration(parent_package="", top_path=None): - import numpy from numpy.distutils.misc_util import Configuration + import numpy libraries = [] if os.name == "posix": diff --git a/sklearn/svm/__init__.py b/sklearn/svm/__init__.py index 0d64ce24cdd63..f5b4123230f93 100644 --- a/sklearn/svm/__init__.py +++ b/sklearn/svm/__init__.py @@ -10,8 +10,8 @@ # of their respective owners. # License: BSD 3 clause (C) INRIA 2010 +from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR from ._bounds import l1_min_c -from ._classes import SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM __all__ = [ "LinearSVC", diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py index ebc4edcaa56bc..42b53409fa8b8 100644 --- a/sklearn/svm/_base.py +++ b/sklearn/svm/_base.py @@ -1,30 +1,30 @@ -import numbers import warnings +import numbers from abc import ABCMeta, abstractmethod import numpy as np import scipy.sparse as sp -from ..base import BaseEstimator, ClassifierMixin -from ..exceptions import ConvergenceWarning, NotFittedError -from ..preprocessing import LabelEncoder -from ..utils import check_array, check_random_state, column_or_1d, compute_class_weight -from ..utils.extmath import safe_sparse_dot -from ..utils.metaestimators import available_if -from ..utils.multiclass import _ovr_decision_function, check_classification_targets -from ..utils.validation import ( - _check_large_sparse, - _check_sample_weight, - _num_samples, - check_consistent_length, - check_is_fitted, -) - # mypy error: error: Module 'sklearn.svm' has no attribute '_libsvm' # (and same for other imports) -from . import _liblinear as liblinear # type: ignore from . import _libsvm as libsvm # type: ignore +from . import _liblinear as liblinear # type: ignore from . import _libsvm_sparse as libsvm_sparse # type: ignore +from ..base import BaseEstimator, ClassifierMixin +from ..preprocessing import LabelEncoder +from ..utils.multiclass import _ovr_decision_function +from ..utils import check_array, check_random_state +from ..utils import column_or_1d +from ..utils import compute_class_weight +from ..utils.metaestimators import available_if +from ..utils.extmath import safe_sparse_dot +from ..utils.validation import check_is_fitted, _check_large_sparse +from ..utils.validation import _num_samples +from ..utils.validation import _check_sample_weight, check_consistent_length +from ..utils.multiclass import check_classification_targets +from ..exceptions import ConvergenceWarning +from ..exceptions import NotFittedError + LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr"] diff --git a/sklearn/svm/_bounds.py b/sklearn/svm/_bounds.py index 6806793f8a52c..86e973973ca5a 100644 --- a/sklearn/svm/_bounds.py +++ b/sklearn/svm/_bounds.py @@ -5,8 +5,8 @@ import numpy as np from ..preprocessing import LabelBinarizer +from ..utils.validation import check_consistent_length, check_array from ..utils.extmath import safe_sparse_dot -from ..utils.validation import check_array, check_consistent_length def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0): diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 484f222b66db3..3cfbafce876ea 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -1,12 +1,11 @@ -import warnings - import numpy as np +import warnings -from ..base import BaseEstimator, OutlierMixin, RegressorMixin -from ..linear_model._base import LinearClassifierMixin, LinearModel, SparseCoefMixin -from ..utils.multiclass import check_classification_targets +from ._base import _fit_liblinear, BaseSVC, BaseLibSVM +from ..base import BaseEstimator, RegressorMixin, OutlierMixin +from ..linear_model._base import LinearClassifierMixin, SparseCoefMixin, LinearModel from ..utils.validation import _num_samples -from ._base import BaseLibSVM, BaseSVC, _fit_liblinear +from ..utils.multiclass import check_classification_targets class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator): diff --git a/sklearn/svm/_liblinear.pyx b/sklearn/svm/_liblinear.pyx index 9057f42ce97f2..9dd15e0716c7f 100644 --- a/sklearn/svm/_liblinear.pyx +++ b/sklearn/svm/_liblinear.pyx @@ -4,11 +4,10 @@ Wrapper for liblinear Author: fabian.pedregosa@inria.fr """ -import numpy as np - +import numpy as np cimport numpy as np -from ..utils._cython_blas cimport _axpy, _dot, _nrm2, _scal +from ..utils._cython_blas cimport _dot, _axpy, _scal, _nrm2 include "_liblinear.pxi" @@ -51,7 +50,7 @@ def train_wrap(X, np.ndarray[np.float64_t, ndim=1, mode='c'] Y, free_problem(problem) free_parameter(param) raise ValueError(error_msg) - + cdef BlasFunctions blas_functions blas_functions.dot = _dot[double] blas_functions.axpy = _axpy[double] diff --git a/sklearn/svm/_libsvm.pyx b/sklearn/svm/_libsvm.pyx index 76472005c19ad..4df99724b790a 100644 --- a/sklearn/svm/_libsvm.pyx +++ b/sklearn/svm/_libsvm.pyx @@ -28,12 +28,9 @@ Authors """ import warnings - -import numpy as np - +import numpy as np cimport numpy as np from libc.stdlib cimport free - from ..utils._cython_blas cimport _dot include "_libsvm.pxi" diff --git a/sklearn/svm/_libsvm_sparse.pyx b/sklearn/svm/_libsvm_sparse.pyx index fb374eb4a32cd..64fc69364b2ee 100644 --- a/sklearn/svm/_libsvm_sparse.pyx +++ b/sklearn/svm/_libsvm_sparse.pyx @@ -1,15 +1,9 @@ import warnings - -import numpy as np - +import numpy as np cimport numpy as np - from scipy import sparse - from ..exceptions import ConvergenceWarning - from ..utils._cython_blas cimport _dot - np.import_array() cdef extern from *: diff --git a/sklearn/svm/setup.py b/sklearn/svm/setup.py index c401864d39502..d5f94d8a11181 100644 --- a/sklearn/svm/setup.py +++ b/sklearn/svm/setup.py @@ -1,6 +1,5 @@ import os from os.path import join - import numpy diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py index f3c91906a25d1..043c86dec86e4 100644 --- a/sklearn/svm/tests/test_bounds.py +++ b/sklearn/svm/tests/test_bounds.py @@ -1,12 +1,14 @@ import numpy as np -import pytest from scipy import sparse as sp from scipy import stats -from sklearn.linear_model import LogisticRegression -from sklearn.svm import LinearSVC +import pytest + from sklearn.svm._bounds import l1_min_c -from sklearn.svm._newrand import bounded_rand_int_wrap, set_seed_wrap +from sklearn.svm import LinearSVC +from sklearn.linear_model import LogisticRegression +from sklearn.svm._newrand import set_seed_wrap, bounded_rand_int_wrap + dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]] sparse_X = sp.csr_matrix(dense_X) diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index b45fe60c2948b..3bb6d0f268d07 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -1,14 +1,16 @@ -import numpy as np import pytest + +import numpy as np from numpy.testing import assert_array_almost_equal, assert_array_equal from scipy import sparse -from sklearn import base, datasets, linear_model, svm -from sklearn.datasets import load_digits, make_blobs, make_classification -from sklearn.exceptions import ConvergenceWarning +from sklearn import datasets, svm, linear_model, base +from sklearn.datasets import make_classification, load_digits, make_blobs from sklearn.svm.tests import test_svm -from sklearn.utils._testing import ignore_warnings, skip_if_32bit +from sklearn.exceptions import ConvergenceWarning from sklearn.utils.extmath import safe_sparse_dot +from sklearn.utils._testing import ignore_warnings, skip_if_32bit + # test sample 1 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index cf1587615c5e2..db1d49ab4bcf9 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -3,37 +3,32 @@ TODO: remove hard coded numerical results when possible """ +import numpy as np import itertools +import pytest import re -import numpy as np -import pytest -from numpy.testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) +from numpy.testing import assert_array_equal, assert_array_almost_equal +from numpy.testing import assert_almost_equal +from numpy.testing import assert_allclose from scipy import sparse - -from sklearn import base, datasets, linear_model, metrics, svm -from sklearn.datasets import make_blobs, make_classification -from sklearn.exceptions import ( - ConvergenceWarning, - NotFittedError, - UndefinedMetricWarning, -) +from sklearn import svm, linear_model, datasets, metrics, base +from sklearn.svm import LinearSVC +from sklearn.svm import LinearSVR +from sklearn.model_selection import train_test_split +from sklearn.datasets import make_classification, make_blobs from sklearn.metrics import f1_score from sklearn.metrics.pairwise import rbf_kernel -from sklearn.model_selection import train_test_split +from sklearn.utils import check_random_state +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.validation import _num_samples +from sklearn.utils import shuffle +from sklearn.exceptions import ConvergenceWarning +from sklearn.exceptions import NotFittedError, UndefinedMetricWarning from sklearn.multiclass import OneVsRestClassifier # mypy error: Module 'sklearn.svm' has no attribute '_libsvm' from sklearn.svm import _libsvm # type: ignore -from sklearn.svm import LinearSVC, LinearSVR -from sklearn.utils import check_random_state, shuffle -from sklearn.utils._testing import ignore_warnings -from sklearn.utils.validation import _num_samples # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/tests/random_seed.py b/sklearn/tests/random_seed.py index a114749e33494..f282f8002f2c5 100644 --- a/sklearn/tests/random_seed.py +++ b/sklearn/tests/random_seed.py @@ -8,11 +8,10 @@ https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed """ +import pytest from os import environ from random import Random -import pytest - # Passes the main worker's random seeds to workers class XDistHooks: diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index d17ccd61ac722..bdbe55c463841 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -1,27 +1,30 @@ # Author: Gael Varoquaux # License: BSD 3 clause -import pickle import re -import warnings - import numpy as np -import pytest import scipy.sparse as sp +import pytest +import warnings import sklearn -from sklearn import config_context, datasets -from sklearn.base import BaseEstimator, TransformerMixin, clone, is_classifier -from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import Pipeline +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_no_warnings +from sklearn.utils._testing import ignore_warnings + +from sklearn.base import BaseEstimator, clone, is_classifier from sklearn.svm import SVC -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.pipeline import Pipeline +from sklearn.model_selection import GridSearchCV + +from sklearn.tree import DecisionTreeClassifier +from sklearn.tree import DecisionTreeRegressor +from sklearn import datasets + +from sklearn.base import TransformerMixin from sklearn.utils._mocking import MockDataFrame -from sklearn.utils._testing import ( - assert_array_equal, - assert_no_warnings, - ignore_warnings, -) +from sklearn import config_context +import pickle ############################################################################# @@ -650,9 +653,9 @@ def transform(self, X): "Feature names only support names that are all strings. " "Got feature names with dtypes: ['int', 'str']" ) - with pytest.warns(FutureWarning, match=msg): + with pytest.warns(FutureWarning, match=msg) as record: trans.fit(df_mixed) # transform on feature names that are mixed also warns: - with pytest.warns(FutureWarning, match=msg): + with pytest.warns(FutureWarning, match=msg) as record: trans.transform(df_mixed) diff --git a/sklearn/tests/test_build.py b/sklearn/tests/test_build.py index 40a960cba6283..d6affa5e4cc78 100644 --- a/sklearn/tests/test_build.py +++ b/sklearn/tests/test_build.py @@ -1,7 +1,6 @@ import os -import textwrap - import pytest +import textwrap from sklearn import __version__ from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 603834765b19f..fb8a6d4f344b2 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -1,52 +1,51 @@ # Authors: Alexandre Gramfort # License: BSD 3 clause -import numpy as np import pytest +import numpy as np from numpy.testing import assert_allclose from scipy import sparse from sklearn.base import BaseEstimator, clone -from sklearn.calibration import ( - CalibratedClassifierCV, - CalibrationDisplay, - _CalibratedClassifier, - _sigmoid_calibration, - _SigmoidCalibration, - calibration_curve, -) -from sklearn.datasets import load_iris, make_blobs, make_classification from sklearn.dummy import DummyClassifier +from sklearn.model_selection import LeaveOneOut, train_test_split + +from sklearn.utils._testing import ( + assert_array_almost_equal, + assert_almost_equal, + assert_array_equal, +) +from sklearn.utils.extmath import softmax +from sklearn.exceptions import NotFittedError +from sklearn.datasets import make_classification, make_blobs, load_iris +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import KFold, cross_val_predict +from sklearn.naive_bayes import MultinomialNB from sklearn.ensemble import ( RandomForestClassifier, RandomForestRegressor, VotingClassifier, ) -from sklearn.exceptions import NotFittedError +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.tree import DecisionTreeClassifier +from sklearn.svm import LinearSVC +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.isotonic import IsotonicRegression from sklearn.feature_extraction import DictVectorizer from sklearn.impute import SimpleImputer -from sklearn.isotonic import IsotonicRegression -from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.metrics import brier_score_loss -from sklearn.model_selection import ( - KFold, - LeaveOneOut, - cross_val_predict, - train_test_split, +from sklearn.calibration import ( + _CalibratedClassifier, + _SigmoidCalibration, + _sigmoid_calibration, + CalibratedClassifierCV, + CalibrationDisplay, + calibration_curve, ) -from sklearn.naive_bayes import MultinomialNB -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.preprocessing import LabelEncoder, StandardScaler -from sklearn.svm import LinearSVC -from sklearn.tree import DecisionTreeClassifier from sklearn.utils._mocking import CheckingClassifier -from sklearn.utils._testing import ( - _convert_container, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) -from sklearn.utils.extmath import softmax +from sklearn.utils._testing import _convert_container + N_SAMPLES = 200 diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 6caa8c80bd56e..b5fc83d1028b3 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -7,44 +7,52 @@ # License: BSD 3 clause import os -import pkgutil -import re -import sys import warnings +import sys +import re +import pkgutil +from inspect import isgenerator, signature, Parameter +from itertools import product, chain from functools import partial -from inspect import Parameter, isgenerator, signature -from itertools import chain, product -import numpy as np import pytest +import numpy as np + +from sklearn.utils import all_estimators +from sklearn.utils._testing import ignore_warnings +from sklearn.exceptions import ConvergenceWarning +from sklearn.exceptions import FitFailedWarning +from sklearn.utils.estimator_checks import check_estimator import sklearn + from sklearn.decomposition import PCA -from sklearn.exceptions import ConvergenceWarning, FitFailedWarning -from sklearn.experimental import enable_halving_search_cv # noqa -from sklearn.linear_model import LogisticRegression, Ridge from sklearn.linear_model._base import LinearClassifierMixin -from sklearn.model_selection import ( - GridSearchCV, - HalvingGridSearchCV, - HalvingRandomSearchCV, - RandomizedSearchCV, -) +from sklearn.linear_model import LogisticRegression +from sklearn.linear_model import Ridge +from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import RandomizedSearchCV +from sklearn.experimental import enable_halving_search_cv # noqa +from sklearn.model_selection import HalvingGridSearchCV +from sklearn.model_selection import HalvingRandomSearchCV from sklearn.pipeline import make_pipeline -from sklearn.utils import IS_PYPY, all_estimators + +from sklearn.utils import IS_PYPY from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags -from sklearn.utils._testing import SkipTest, ignore_warnings, set_random_state +from sklearn.utils._testing import ( + SkipTest, + set_random_state, +) from sklearn.utils.estimator_checks import ( _construct_instance, - _get_check_estimator_ids, _set_checking_parameters, + _get_check_estimator_ids, check_class_weight_balanced_linear_classifier, + parametrize_with_checks, check_dataframe_column_names_consistency, - check_estimator, check_n_features_in_after_fitting, check_transformer_get_feature_names_out, check_transformer_get_feature_names_out_pandas, - parametrize_with_checks, ) diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py index 88252401cab69..86496d6bd45cf 100644 --- a/sklearn/tests/test_config.py +++ b/sklearn/tests/test_config.py @@ -1,10 +1,10 @@ import time from concurrent.futures import ThreadPoolExecutor +from joblib import Parallel import pytest -from joblib import Parallel -from sklearn import config_context, get_config, set_config +from sklearn import get_config, set_config, config_context from sklearn.utils.fixes import delayed diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 7e22a4e5dc294..9ef444c67b3e1 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -1,23 +1,26 @@ import numpy as np + import pytest + from scipy import linalg -from sklearn.cluster import KMeans -from sklearn.covariance import LedoitWolf, ShrunkCovariance, ledoit_wolf +from sklearn.utils import check_random_state +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_almost_equal + from sklearn.datasets import make_blobs -from sklearn.discriminant_analysis import ( - LinearDiscriminantAnalysis, - QuadraticDiscriminantAnalysis, - _cov, -) +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn.discriminant_analysis import _cov +from sklearn.covariance import ledoit_wolf +from sklearn.cluster import KMeans + +from sklearn.covariance import ShrunkCovariance +from sklearn.covariance import LedoitWolf + from sklearn.preprocessing import StandardScaler -from sklearn.utils import check_random_state -from sklearn.utils._testing import ( - assert_allclose, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) # Data is just 6 separable points in the plane X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype="f") diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index bb68e32f0ea34..5e22b425be1ec 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -2,31 +2,31 @@ # Raghav RV # License: BSD 3 clause -import importlib import inspect import warnings -from inspect import signature +import importlib + from pkgutil import walk_packages +from inspect import signature import numpy as np -import pytest import sklearn +from sklearn.utils import IS_PYPY +from sklearn.utils._testing import check_docstring_parameters +from sklearn.utils._testing import _get_func_name +from sklearn.utils._testing import ignore_warnings +from sklearn.utils import all_estimators +from sklearn.utils.estimator_checks import _enforce_estimator_tags_y +from sklearn.utils.estimator_checks import _enforce_estimator_tags_x +from sklearn.utils.estimator_checks import _construct_instance +from sklearn.utils.deprecation import _is_deprecated from sklearn.datasets import make_classification from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import FunctionTransformer -from sklearn.utils import IS_PYPY, all_estimators -from sklearn.utils._testing import ( - _get_func_name, - check_docstring_parameters, - ignore_warnings, -) -from sklearn.utils.deprecation import _is_deprecated -from sklearn.utils.estimator_checks import ( - _construct_instance, - _enforce_estimator_tags_x, - _enforce_estimator_tags_y, -) + +import pytest + # walk_packages() ignores DeprecationWarnings, now we need to ignore # FutureWarnings diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index 3aeadfb6f0a1a..8ee55f2ef6d78 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -1,14 +1,13 @@ -import importlib -import inspect -import pkgutil import re from inspect import signature +import pkgutil +import inspect +import importlib from typing import Optional import pytest - -import sklearn from sklearn.utils import all_estimators +import sklearn numpydoc_validation = pytest.importorskip("numpydoc.validate") @@ -328,8 +327,8 @@ def test_docstring(Estimator, method, request): if __name__ == "__main__": - import argparse import sys + import argparse parser = argparse.ArgumentParser(description="Validate docstring with numpydoc.") parser.add_argument("import_path", help="Import path to validate") diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index c8cc55980dd84..61f8c2e4190e1 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -1,17 +1,17 @@ -import numpy as np import pytest + +import numpy as np import scipy.sparse as sp from sklearn.base import clone +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.stats import _weighted_percentile + from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.exceptions import NotFittedError -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - ignore_warnings, -) -from sklearn.utils.stats import _weighted_percentile @ignore_warnings diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index e2cb85985f726..5600cf8706e75 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -1,25 +1,27 @@ -import copy -import pickle import warnings - import numpy as np +import pickle +import copy + import pytest -from scipy.special import expit from sklearn.datasets import make_regression from sklearn.isotonic import ( - IsotonicRegression, - _make_unique, check_increasing, isotonic_regression, + IsotonicRegression, + _make_unique, ) -from sklearn.utils import shuffle + +from sklearn.utils.validation import check_array from sklearn.utils._testing import ( assert_allclose, - assert_array_almost_equal, assert_array_equal, + assert_array_almost_equal, ) -from sklearn.utils.validation import check_array +from sklearn.utils import shuffle + +from scipy.special import expit def test_permutation_invariance(): diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py index 29bd22f22122f..bcee4781b5927 100644 --- a/sklearn/tests/test_kernel_approximation.py +++ b/sklearn/tests/test_kernel_approximation.py @@ -1,24 +1,20 @@ import re import numpy as np -import pytest from scipy.sparse import csr_matrix +import pytest + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.metrics.pairwise import kernel_metrics +from sklearn.kernel_approximation import RBFSampler +from sklearn.kernel_approximation import AdditiveChi2Sampler +from sklearn.kernel_approximation import SkewedChi2Sampler +from sklearn.kernel_approximation import Nystroem +from sklearn.kernel_approximation import PolynomialCountSketch from sklearn.datasets import make_classification -from sklearn.kernel_approximation import ( - AdditiveChi2Sampler, - Nystroem, - PolynomialCountSketch, - RBFSampler, - SkewedChi2Sampler, -) -from sklearn.metrics.pairwise import ( - chi2_kernel, - kernel_metrics, - polynomial_kernel, - rbf_kernel, -) -from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal +from sklearn.metrics.pairwise import polynomial_kernel, rbf_kernel, chi2_kernel # generate data rng = np.random.RandomState(0) diff --git a/sklearn/tests/test_kernel_ridge.py b/sklearn/tests/test_kernel_ridge.py index e0d2d2cf39574..76a5c77e73be1 100644 --- a/sklearn/tests/test_kernel_ridge.py +++ b/sklearn/tests/test_kernel_ridge.py @@ -2,10 +2,13 @@ import scipy.sparse as sp from sklearn.datasets import make_regression -from sklearn.kernel_ridge import KernelRidge from sklearn.linear_model import Ridge +from sklearn.kernel_ridge import KernelRidge from sklearn.metrics.pairwise import pairwise_kernels -from sklearn.utils._testing import assert_array_almost_equal, ignore_warnings +from sklearn.utils._testing import ignore_warnings + +from sklearn.utils._testing import assert_array_almost_equal + X, y = make_regression(n_features=10, random_state=0) Xcsr = sp.csr_matrix(X) diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index 3e1b846f2f7fb..e743741f6fa43 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -5,24 +5,23 @@ import numpy as np import pytest -from sklearn.base import BaseEstimator, is_regressor +from sklearn.base import BaseEstimator +from sklearn.base import is_regressor from sklearn.datasets import make_classification -from sklearn.ensemble import BaggingClassifier -from sklearn.exceptions import NotFittedError +from sklearn.utils import all_estimators +from sklearn.utils.estimator_checks import _enforce_estimator_tags_x +from sklearn.utils.estimator_checks import _enforce_estimator_tags_y +from sklearn.utils.validation import check_is_fitted +from sklearn.utils._testing import set_random_state +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_selection import RFE, RFECV -from sklearn.linear_model import LogisticRegression, Ridge -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.preprocessing import MaxAbsScaler, StandardScaler +from sklearn.ensemble import BaggingClassifier +from sklearn.exceptions import NotFittedError from sklearn.semi_supervised import SelfTrainingClassifier -from sklearn.utils import all_estimators -from sklearn.utils._testing import set_random_state -from sklearn.utils.estimator_checks import ( - _enforce_estimator_tags_x, - _enforce_estimator_tags_y, -) -from sklearn.utils.validation import check_is_fitted +from sklearn.linear_model import Ridge, LogisticRegression +from sklearn.preprocessing import StandardScaler, MaxAbsScaler class DelegatorData: diff --git a/sklearn/tests/test_min_dependencies_readme.py b/sklearn/tests/test_min_dependencies_readme.py index f49df2414cedb..8b2b548c5bf42 100644 --- a/sklearn/tests/test_min_dependencies_readme.py +++ b/sklearn/tests/test_min_dependencies_readme.py @@ -2,12 +2,11 @@ import os -import platform import re +import platform from pathlib import Path import pytest - import sklearn from sklearn._min_dependencies import dependent_packages from sklearn.utils.fixes import parse_version diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index bc748ecfd81b4..a3621414ae793 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -1,39 +1,45 @@ -from re import escape - import numpy as np -import pytest import scipy.sparse as sp +import pytest from numpy.testing import assert_allclose -from sklearn import datasets, svm -from sklearn.datasets import load_breast_cancer -from sklearn.exceptions import NotFittedError -from sklearn.impute import SimpleImputer +from re import escape + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._mocking import CheckingClassifier +from sklearn.multiclass import OneVsRestClassifier +from sklearn.multiclass import OneVsOneClassifier +from sklearn.multiclass import OutputCodeClassifier +from sklearn.utils.multiclass import check_classification_targets, type_of_target +from sklearn.utils import ( + check_array, + shuffle, +) + +from sklearn.metrics import precision_score +from sklearn.metrics import recall_score + +from sklearn.svm import LinearSVC, SVC +from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import ( - ElasticNet, - Lasso, LinearRegression, - LogisticRegression, - Perceptron, + Lasso, + ElasticNet, Ridge, + Perceptron, + LogisticRegression, SGDClassifier, ) -from sklearn.metrics import precision_score, recall_score -from sklearn.model_selection import GridSearchCV, cross_val_score -from sklearn.multiclass import ( - OneVsOneClassifier, - OneVsRestClassifier, - OutputCodeClassifier, -) -from sklearn.naive_bayes import MultinomialNB +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.neighbors import KNeighborsClassifier +from sklearn.model_selection import GridSearchCV, cross_val_score from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.svm import SVC, LinearSVC -from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from sklearn.utils import check_array, shuffle -from sklearn.utils._mocking import CheckingClassifier -from sklearn.utils._testing import assert_almost_equal, assert_array_equal -from sklearn.utils.multiclass import check_classification_targets, type_of_target +from sklearn.impute import SimpleImputer +from sklearn import svm +from sklearn.exceptions import NotFittedError +from sklearn import datasets +from sklearn.datasets import load_breast_cancer iris = datasets.load_iris() rng = np.random.RandomState(0) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 2e3d50baed0ee..25d209223acc1 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -1,44 +1,36 @@ -import numpy as np import pytest +import numpy as np import scipy.sparse as sp - from joblib import cpu_count + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal from sklearn import datasets -from sklearn.base import ClassifierMixin, clone -from sklearn.datasets import load_linnerud, make_classification -from sklearn.dummy import DummyClassifier, DummyRegressor -from sklearn.ensemble import ( - GradientBoostingRegressor, - RandomForestClassifier, - StackingRegressor, -) +from sklearn.base import clone +from sklearn.datasets import make_classification +from sklearn.datasets import load_linnerud +from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier from sklearn.exceptions import NotFittedError -from sklearn.impute import SimpleImputer -from sklearn.linear_model import ( - Lasso, - LogisticRegression, - OrthogonalMatchingPursuit, - Ridge, - SGDClassifier, - SGDRegressor, -) +from sklearn.linear_model import Lasso +from sklearn.linear_model import LogisticRegression +from sklearn.linear_model import OrthogonalMatchingPursuit +from sklearn.linear_model import Ridge +from sklearn.linear_model import SGDClassifier +from sklearn.linear_model import SGDRegressor from sklearn.metrics import jaccard_score, mean_squared_error -from sklearn.model_selection import GridSearchCV from sklearn.multiclass import OneVsRestClassifier -from sklearn.multioutput import ( - ClassifierChain, - MultiOutputClassifier, - MultiOutputRegressor, - RegressorChain, -) -from sklearn.pipeline import make_pipeline +from sklearn.multioutput import ClassifierChain, RegressorChain +from sklearn.multioutput import MultiOutputClassifier +from sklearn.multioutput import MultiOutputRegressor from sklearn.svm import LinearSVC +from sklearn.base import ClassifierMixin from sklearn.utils import shuffle -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) +from sklearn.model_selection import GridSearchCV +from sklearn.dummy import DummyRegressor, DummyClassifier +from sklearn.pipeline import make_pipeline +from sklearn.impute import SimpleImputer +from sklearn.ensemble import StackingRegressor def test_multi_target_regression(): diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index d6b2eddabb83e..47fd6821ad305 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -1,24 +1,22 @@ import re -import warnings import numpy as np -import pytest import scipy.sparse +import pytest +import warnings from sklearn.datasets import load_digits, load_iris -from sklearn.model_selection import cross_val_score, train_test_split -from sklearn.naive_bayes import ( - BernoulliNB, - CategoricalNB, - ComplementNB, - GaussianNB, - MultinomialNB, -) -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) + +from sklearn.model_selection import train_test_split +from sklearn.model_selection import cross_val_score + +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal + +from sklearn.naive_bayes import GaussianNB, BernoulliNB +from sklearn.naive_bayes import MultinomialNB, ComplementNB +from sklearn.naive_bayes import CategoricalNB DISCRETE_NAIVE_BAYES_CLASSES = [BernoulliNB, CategoricalNB, ComplementNB, MultinomialNB] ALL_NAIVE_BAYES_CLASSES = DISCRETE_NAIVE_BAYES_CLASSES + [GaussianNB] diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 2c4e939dcc6db..6913815191ea8 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -1,42 +1,43 @@ """ Test the pipeline module. """ -import itertools -import re +from tempfile import mkdtemp import shutil import time -from tempfile import mkdtemp +import re +import itertools -import numpy as np import pytest +import numpy as np from scipy import sparse - import joblib -from sklearn.base import BaseEstimator, TransformerMixin, clone, is_classifier -from sklearn.cluster import KMeans -from sklearn.datasets import load_iris -from sklearn.decomposition import PCA, TruncatedSVD -from sklearn.dummy import DummyRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.exceptions import NotFittedError -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.feature_selection import SelectKBest, f_classif -from sklearn.impute import SimpleImputer -from sklearn.linear_model import Lasso, LinearRegression, LogisticRegression -from sklearn.metrics import accuracy_score, r2_score -from sklearn.neighbors import LocalOutlierFactor -from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline, make_union -from sklearn.preprocessing import StandardScaler -from sklearn.svm import SVC + from sklearn.utils._testing import ( + assert_allclose, + assert_array_equal, + assert_array_almost_equal, MinimalClassifier, MinimalRegressor, MinimalTransformer, - assert_allclose, - assert_array_almost_equal, - assert_array_equal, ) +from sklearn.exceptions import NotFittedError from sklearn.utils.validation import check_is_fitted +from sklearn.base import clone, is_classifier, BaseEstimator, TransformerMixin +from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union +from sklearn.svm import SVC +from sklearn.neighbors import LocalOutlierFactor +from sklearn.linear_model import LogisticRegression, Lasso +from sklearn.linear_model import LinearRegression +from sklearn.metrics import accuracy_score, r2_score +from sklearn.cluster import KMeans +from sklearn.feature_selection import SelectKBest, f_classif +from sklearn.dummy import DummyRegressor +from sklearn.decomposition import PCA, TruncatedSVD +from sklearn.datasets import load_iris +from sklearn.preprocessing import StandardScaler +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.impute import SimpleImputer iris = load_iris() diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py index 3b768bec829f4..4d21090a3e6fb 100644 --- a/sklearn/tests/test_random_projection.py +++ b/sklearn/tests/test_random_projection.py @@ -1,27 +1,25 @@ import functools +from typing import List, Any import warnings -from typing import Any, List import numpy as np -import pytest import scipy.sparse as sp +import pytest -from sklearn.exceptions import DataDimensionalityWarning from sklearn.metrics import euclidean_distances -from sklearn.random_projection import ( - GaussianRandomProjection, - SparseRandomProjection, - _gaussian_random_matrix, - _sparse_random_matrix, - johnson_lindenstrauss_min_dim, -) -from sklearn.utils._testing import ( - assert_allclose, - assert_allclose_dense_sparse, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, -) + +from sklearn.random_projection import johnson_lindenstrauss_min_dim +from sklearn.random_projection import _gaussian_random_matrix +from sklearn.random_projection import _sparse_random_matrix +from sklearn.random_projection import SparseRandomProjection +from sklearn.random_projection import GaussianRandomProjection + +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.exceptions import DataDimensionalityWarning all_sparse_random_matrix: List[Any] = [_sparse_random_matrix] all_dense_random_matrix: List[Any] = [_gaussian_random_matrix] diff --git a/sklearn/tree/__init__.py b/sklearn/tree/__init__.py index 8cfb42c73e118..f7a8fd183c7cc 100644 --- a/sklearn/tree/__init__.py +++ b/sklearn/tree/__init__.py @@ -3,14 +3,12 @@ classification and regression. """ -from ._classes import ( - BaseDecisionTree, - DecisionTreeClassifier, - DecisionTreeRegressor, - ExtraTreeClassifier, - ExtraTreeRegressor, -) -from ._export import export_graphviz, export_text, plot_tree +from ._classes import BaseDecisionTree +from ._classes import DecisionTreeClassifier +from ._classes import DecisionTreeRegressor +from ._classes import ExtraTreeClassifier +from ._classes import ExtraTreeRegressor +from ._export import export_graphviz, plot_tree, export_text __all__ = [ "BaseDecisionTree", diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index f4be1e16653f2..79257355a4150 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -14,37 +14,39 @@ # # License: BSD 3 clause -import copy import numbers import warnings -from abc import ABCMeta, abstractmethod +import copy +from abc import ABCMeta +from abc import abstractmethod from math import ceil import numpy as np from scipy.sparse import issparse -from ..base import ( - BaseEstimator, - ClassifierMixin, - MultiOutputMixin, - RegressorMixin, - clone, - is_classifier, -) -from ..utils import Bunch, check_random_state, check_scalar, compute_sample_weight +from ..base import BaseEstimator +from ..base import ClassifierMixin +from ..base import clone +from ..base import RegressorMixin +from ..base import is_classifier +from ..base import MultiOutputMixin +from ..utils import Bunch +from ..utils import check_random_state +from ..utils import check_scalar from ..utils.deprecation import deprecated +from ..utils.validation import _check_sample_weight +from ..utils import compute_sample_weight from ..utils.multiclass import check_classification_targets -from ..utils.validation import _check_sample_weight, check_is_fitted -from . import _criterion, _splitter, _tree +from ..utils.validation import check_is_fitted + from ._criterion import Criterion from ._splitter import Splitter -from ._tree import ( - BestFirstTreeBuilder, - DepthFirstTreeBuilder, - Tree, - _build_pruned_tree_ccp, - ccp_pruning_path, -) +from ._tree import DepthFirstTreeBuilder +from ._tree import BestFirstTreeBuilder +from ._tree import Tree +from ._tree import _build_pruned_tree_ccp +from ._tree import ccp_pruning_path +from . import _tree, _splitter, _criterion __all__ = [ "DecisionTreeClassifier", diff --git a/sklearn/tree/_criterion.pxd b/sklearn/tree/_criterion.pxd index 7ffed4845970e..1639b5f4b3195 100644 --- a/sklearn/tree/_criterion.pxd +++ b/sklearn/tree/_criterion.pxd @@ -10,15 +10,13 @@ # See _criterion.pyx for implementation details. import numpy as np - cimport numpy as np -from ._tree cimport DOUBLE_t # Type of y, sample_weight -from ._tree cimport DTYPE_t # Type of X -from ._tree cimport INT32_t # Signed 32 bit integer -from ._tree cimport SIZE_t # Type for indices and counters -from ._tree cimport UINT32_t # Unsigned 32 bit integer - +from ._tree cimport DTYPE_t # Type of X +from ._tree cimport DOUBLE_t # Type of y, sample_weight +from ._tree cimport SIZE_t # Type for indices and counters +from ._tree cimport INT32_t # Signed 32 bit integer +from ._tree cimport UINT32_t # Unsigned 32 bit integer cdef class Criterion: # The criterion computes the impurity of a node and the reduction of diff --git a/sklearn/tree/_criterion.pyx b/sklearn/tree/_criterion.pyx index 72f9be8b02157..57012fcab2296 100644 --- a/sklearn/tree/_criterion.pyx +++ b/sklearn/tree/_criterion.pyx @@ -12,20 +12,19 @@ # # License: BSD 3 clause +from libc.string cimport memcpy +from libc.string cimport memset from libc.math cimport fabs -from libc.string cimport memcpy, memset import numpy as np - cimport numpy as np - np.import_array() from numpy.math cimport INFINITY from scipy.special.cython_special cimport xlogy -from ._utils cimport WeightedMedianCalculator, log - +from ._utils cimport log +from ._utils cimport WeightedMedianCalculator # EPSILON is used in the Poisson criterion cdef double EPSILON = 10 * np.finfo('double').eps diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py index 03bb773901c36..4e2e8b58cc370 100644 --- a/sklearn/tree/_export.py +++ b/sklearn/tree/_export.py @@ -16,10 +16,13 @@ import numpy as np -from ..base import is_classifier from ..utils.validation import check_is_fitted -from . import DecisionTreeClassifier, _criterion, _tree -from ._reingold_tilford import Tree, buchheim +from ..base import is_classifier + +from . import _criterion +from . import _tree +from ._reingold_tilford import buchheim, Tree +from . import DecisionTreeClassifier def _color_brew(n): diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd index d77c5d5cb1cbe..cf01fed9cfd7d 100644 --- a/sklearn/tree/_splitter.pxd +++ b/sklearn/tree/_splitter.pxd @@ -10,16 +10,15 @@ # See _splitter.pyx for details. import numpy as np - cimport numpy as np from ._criterion cimport Criterion -from ._tree cimport DOUBLE_t # Type of y, sample_weight -from ._tree cimport DTYPE_t # Type of X -from ._tree cimport INT32_t # Signed 32 bit integer -from ._tree cimport SIZE_t # Type for indices and counters -from ._tree cimport UINT32_t # Unsigned 32 bit integer +from ._tree cimport DTYPE_t # Type of X +from ._tree cimport DOUBLE_t # Type of y, sample_weight +from ._tree cimport SIZE_t # Type for indices and counters +from ._tree cimport INT32_t # Signed 32 bit integer +from ._tree cimport UINT32_t # Unsigned 32 bit integer cdef struct SplitRecord: # Data to track sample split diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx index 5e0779003a436..5d0b6204deb13 100644 --- a/sklearn/tree/_splitter.pyx +++ b/sklearn/tree/_splitter.pyx @@ -11,22 +11,25 @@ # # License: BSD 3 clause -from libc.stdlib cimport free, qsort -from libc.string cimport memcpy, memset - from ._criterion cimport Criterion -import numpy as np +from libc.stdlib cimport free +from libc.stdlib cimport qsort +from libc.string cimport memcpy +from libc.string cimport memset +import numpy as np cimport numpy as np - np.import_array() from scipy.sparse import csc_matrix +from ._utils cimport log +from ._utils cimport rand_int +from ._utils cimport rand_uniform +from ._utils cimport RAND_R_MAX +from ._utils cimport safe_realloc from ..utils._sorting cimport simultaneous_sort -from ._utils cimport RAND_R_MAX, log, rand_int, rand_uniform, safe_realloc - cdef double INFINITY = np.inf diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd index 1639ba9353561..0874187ee98ae 100644 --- a/sklearn/tree/_tree.pxd +++ b/sklearn/tree/_tree.pxd @@ -11,7 +11,6 @@ # See _tree.pyx for details. import numpy as np - cimport numpy as np ctypedef np.npy_float32 DTYPE_t # Type of X @@ -20,8 +19,8 @@ ctypedef np.npy_intp SIZE_t # Type for indices and counters ctypedef np.npy_int32 INT32_t # Signed 32 bit integer ctypedef np.npy_uint32 UINT32_t # Unsigned 32 bit integer -from ._splitter cimport SplitRecord, Splitter - +from ._splitter cimport Splitter +from ._splitter cimport SplitRecord cdef struct Node: # Base storage structure for the nodes in a Tree object diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index d8e9d889bcf57..85c44b5eaf9b8 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -13,26 +13,28 @@ # License: BSD 3 clause from cpython cimport Py_INCREF, PyObject, PyTypeObject + +from libc.stdlib cimport free from libc.math cimport fabs +from libc.string cimport memcpy +from libc.string cimport memset from libc.stdint cimport SIZE_MAX -from libc.stdlib cimport free -from libc.string cimport memcpy, memset -from libcpp cimport bool -from libcpp.algorithm cimport pop_heap, push_heap from libcpp.vector cimport vector +from libcpp.algorithm cimport pop_heap +from libcpp.algorithm cimport push_heap +from libcpp cimport bool import struct import numpy as np - cimport numpy as np - np.import_array() -from scipy.sparse import csr_matrix, issparse - -from ._utils cimport safe_realloc, sizet_ptr_to_ndarray +from scipy.sparse import issparse +from scipy.sparse import csr_matrix +from ._utils cimport safe_realloc +from ._utils cimport sizet_ptr_to_ndarray cdef extern from "numpy/arrayobject.h": object PyArray_NewFromDescr(PyTypeObject* subtype, np.dtype descr, @@ -57,7 +59,6 @@ cdef extern from "" namespace "std" nogil: from numpy import float32 as DTYPE from numpy import float64 as DOUBLE - cdef double INFINITY = np.inf cdef double EPSILON = np.finfo('double').eps diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd index d329fd5d0b6a4..fe4aca67d7b52 100644 --- a/sklearn/tree/_utils.pxd +++ b/sklearn/tree/_utils.pxd @@ -9,11 +9,9 @@ # See _utils.pyx for details. import numpy as np - cimport numpy as np - -from ..neighbors._quad_tree cimport Cell from ._tree cimport Node +from ..neighbors._quad_tree cimport Cell ctypedef np.npy_float32 DTYPE_t # Type of X ctypedef np.npy_float64 DOUBLE_t # Type of y, sample_weight diff --git a/sklearn/tree/_utils.pyx b/sklearn/tree/_utils.pyx index 02b0897faa45f..ba4c0f716a985 100644 --- a/sklearn/tree/_utils.pyx +++ b/sklearn/tree/_utils.pyx @@ -7,13 +7,13 @@ # # License: BSD 3 clause +from libc.stdlib cimport free +from libc.stdlib cimport malloc +from libc.stdlib cimport realloc from libc.math cimport log as ln -from libc.stdlib cimport free, malloc, realloc import numpy as np - cimport numpy as np - np.import_array() from ..utils._random cimport our_rand_r diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index f2cbfb0ce676e..d3b082a927048 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -1,23 +1,18 @@ """ Testing for export functions of decision trees (sklearn.tree.export). """ -from io import StringIO from re import finditer, search from textwrap import dedent -import pytest from numpy.random import RandomState +import pytest from sklearn.base import is_classifier +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.ensemble import GradientBoostingClassifier +from sklearn.tree import export_graphviz, plot_tree, export_text +from io import StringIO from sklearn.exceptions import NotFittedError -from sklearn.tree import ( - DecisionTreeClassifier, - DecisionTreeRegressor, - export_graphviz, - export_text, - plot_tree, -) # toy sample X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]] diff --git a/sklearn/tree/tests/test_reingold_tilford.py b/sklearn/tree/tests/test_reingold_tilford.py index bf0ce3ce2cffc..8f38c997a48d7 100644 --- a/sklearn/tree/tests/test_reingold_tilford.py +++ b/sklearn/tree/tests/test_reingold_tilford.py @@ -1,7 +1,6 @@ import numpy as np import pytest - -from sklearn.tree._reingold_tilford import Tree, buchheim +from sklearn.tree._reingold_tilford import buchheim, Tree simple_tree = Tree("", 0, Tree("", 1), Tree("", 2)) diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index b0b3a52a2e251..a0c2f978ed147 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -2,50 +2,64 @@ Testing for the tree module (sklearn.tree). """ import copy -import copyreg -import io import pickle -import struct from itertools import product +import struct +import io +import copyreg -import numpy as np import pytest -from joblib.numpy_pickle import NumpyPickler +import numpy as np from numpy.testing import assert_allclose -from scipy.sparse import coo_matrix, csc_matrix, csr_matrix +from scipy.sparse import csc_matrix +from scipy.sparse import csr_matrix +from scipy.sparse import coo_matrix import joblib -from sklearn import datasets, tree +from joblib.numpy_pickle import NumpyPickler + +from sklearn.random_projection import _sparse_random_matrix + from sklearn.dummy import DummyRegressor -from sklearn.exceptions import NotFittedError -from sklearn.metrics import accuracy_score, mean_poisson_deviance, mean_squared_error + +from sklearn.metrics import accuracy_score +from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_poisson_deviance + from sklearn.model_selection import train_test_split -from sklearn.random_projection import _sparse_random_matrix -from sklearn.tree import ( - DecisionTreeClassifier, - DecisionTreeRegressor, - ExtraTreeClassifier, - ExtraTreeRegressor, -) -from sklearn.tree._classes import CRITERIA_CLF, CRITERIA_REG -from sklearn.tree._tree import NODE_DTYPE, TREE_LEAF, TREE_UNDEFINED -from sklearn.tree._tree import Tree as CythonTree -from sklearn.tree._tree import ( - _check_n_classes, - _check_node_ndarray, - _check_value_ndarray, -) -from sklearn.utils import _IS_32BIT, compute_sample_weight -from sklearn.utils._testing import ( - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - create_memmap_backed_data, - ignore_warnings, - skip_if_32bit, -) + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import create_memmap_backed_data +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import skip_if_32bit + from sklearn.utils.estimator_checks import check_sample_weights_invariance from sklearn.utils.validation import check_random_state +from sklearn.utils import _IS_32BIT + +from sklearn.exceptions import NotFittedError + +from sklearn.tree import DecisionTreeClassifier +from sklearn.tree import DecisionTreeRegressor +from sklearn.tree import ExtraTreeClassifier +from sklearn.tree import ExtraTreeRegressor + +from sklearn import tree +from sklearn.tree._tree import TREE_LEAF, TREE_UNDEFINED +from sklearn.tree._tree import Tree as CythonTree +from sklearn.tree._tree import _check_n_classes +from sklearn.tree._tree import _check_value_ndarray +from sklearn.tree._tree import _check_node_ndarray +from sklearn.tree._tree import NODE_DTYPE + +from sklearn.tree._classes import CRITERIA_CLF +from sklearn.tree._classes import CRITERIA_REG +from sklearn import datasets + +from sklearn.utils import compute_sample_weight + CLF_CRITERIONS = ("gini", "log_loss") REG_CRITERIONS = ("squared_error", "absolute_error", "friedman_mse", "poisson") diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 13d9f56a880e6..aa056e92b3d12 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1,45 +1,48 @@ """ The :mod:`sklearn.utils` module includes various utilities. """ +import pkgutil import inspect +from importlib import import_module +from operator import itemgetter +from collections.abc import Sequence +from contextlib import contextmanager +from itertools import compress +from itertools import islice import math import numbers -import pkgutil import platform import struct import timeit -import warnings -from collections.abc import Sequence -from contextlib import contextmanager, suppress -from importlib import import_module -from itertools import compress, islice -from operator import itemgetter from pathlib import Path +from contextlib import suppress +import warnings import numpy as np from scipy.sparse import issparse -from .. import get_config -from ..exceptions import DataConversionWarning -from . import _joblib -from ._bunch import Bunch -from ._estimator_html_repr import estimator_html_repr +from .murmurhash import murmurhash3_32 from .class_weight import compute_class_weight, compute_sample_weight +from . import _joblib +from ..exceptions import DataConversionWarning from .deprecation import deprecated from .fixes import parse_version, threadpool_info -from .murmurhash import murmurhash3_32 +from ._estimator_html_repr import estimator_html_repr from .validation import ( as_float_array, assert_all_finite, + check_random_state, + column_or_1d, check_array, check_consistent_length, - check_random_state, - check_scalar, - check_symmetric, check_X_y, - column_or_1d, indexable, + check_symmetric, + check_scalar, ) +from .. import get_config +from ._bunch import Bunch + # Do not deprecate parallel_backend and register_parallel_backend as they are # needed to tune `scikit-learn` behavior and have different effect if called @@ -1168,14 +1171,14 @@ def all_estimators(type_filter=None): and ``class`` is the actual type of the class. """ # lazy import to avoid circular imports from sklearn.base + from ._testing import ignore_warnings from ..base import ( BaseEstimator, ClassifierMixin, - ClusterMixin, RegressorMixin, TransformerMixin, + ClusterMixin, ) - from ._testing import ignore_warnings def is_abstract(c): if not (hasattr(c, "__abstractmethods__")): diff --git a/sklearn/utils/_cython_blas.pyx b/sklearn/utils/_cython_blas.pyx index ed188eb7fbbd1..c15e66ee02ce1 100644 --- a/sklearn/utils/_cython_blas.pyx +++ b/sklearn/utils/_cython_blas.pyx @@ -1,28 +1,17 @@ from cython cimport floating -from scipy.linalg.cython_blas cimport ( - dasum, - daxpy, - dcopy, - ddot, - dgemm, - dgemv, - dger, - dnrm2, - drot, - drotg, - dscal, - sasum, - saxpy, - scopy, - sdot, - sgemm, - sgemv, - sger, - snrm2, - srot, - srotg, - sscal, -) + +from scipy.linalg.cython_blas cimport sdot, ddot +from scipy.linalg.cython_blas cimport sasum, dasum +from scipy.linalg.cython_blas cimport saxpy, daxpy +from scipy.linalg.cython_blas cimport snrm2, dnrm2 +from scipy.linalg.cython_blas cimport scopy, dcopy +from scipy.linalg.cython_blas cimport sscal, dscal +from scipy.linalg.cython_blas cimport srotg, drotg +from scipy.linalg.cython_blas cimport srot, drot +from scipy.linalg.cython_blas cimport sgemv, dgemv +from scipy.linalg.cython_blas cimport sger, dger +from scipy.linalg.cython_blas cimport sgemm, dgemm + ################ # BLAS Level 1 # diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py index 5affa4616be01..de48890fcaacf 100644 --- a/sklearn/utils/_encode.py +++ b/sklearn/utils/_encode.py @@ -1,9 +1,8 @@ -from collections import Counter from contextlib import suppress +from collections import Counter from typing import NamedTuple import numpy as np - from . import is_scalar_nan diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py index 4f10600053d1b..f8911b5c38b08 100644 --- a/sklearn/utils/_estimator_html_repr.py +++ b/sklearn/utils/_estimator_html_repr.py @@ -1,7 +1,8 @@ -import html -from contextlib import closing, suppress +from contextlib import closing +from contextlib import suppress from io import StringIO from string import Template +import html from .. import config_context diff --git a/sklearn/utils/_fast_dict.pxd b/sklearn/utils/_fast_dict.pxd index df2c76b8377e4..1bcc149a54ab5 100644 --- a/sklearn/utils/_fast_dict.pxd +++ b/sklearn/utils/_fast_dict.pxd @@ -5,9 +5,10 @@ Uses C++ map containers for fast dict-like behavior with keys being integers, and values float. """ +from libcpp.map cimport map as cpp_map + # Import the C-level symbols of numpy cimport numpy as cnp -from libcpp.map cimport map as cpp_map ctypedef cnp.float64_t DTYPE_t diff --git a/sklearn/utils/_fast_dict.pyx b/sklearn/utils/_fast_dict.pyx index 7651d85b1a6a1..6d7e62eefc07f 100644 --- a/sklearn/utils/_fast_dict.pyx +++ b/sklearn/utils/_fast_dict.pyx @@ -8,16 +8,14 @@ integers, and values float. cimport cython # C++ -from cython.operator cimport dereference as deref -from cython.operator cimport predecrement as dec -from cython.operator cimport preincrement as inc -from libcpp.map cimport map as cpp_map +from cython.operator cimport dereference as deref, preincrement as inc, \ + predecrement as dec from libcpp.utility cimport pair +from libcpp.map cimport map as cpp_map import numpy as np # Import the C-level symbols of numpy - cimport numpy as np # Numpy must be initialized. When using numpy from C or Cython you must @@ -70,7 +68,7 @@ cdef class IntFloatDict: # while it != end: # yield deref(it).first, deref(it).second # inc(it) - + def __iter__(self): cdef int size = self.my_map.size() cdef ITYPE_t [:] keys = np.empty(size, dtype=np.intp) @@ -149,3 +147,4 @@ def argmin(IntFloatDict d): min_key = deref(it).first inc(it) return min_key, min_value + diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py index 590fdc6170c64..8cbe084c94992 100644 --- a/sklearn/utils/_joblib.py +++ b/sklearn/utils/_joblib.py @@ -5,20 +5,13 @@ # joblib imports may raise DeprecationWarning on certain Python # versions import joblib - from joblib import ( - Memory, - Parallel, - __version__, - cpu_count, - delayed, - dump, - effective_n_jobs, - hash, - load, - logger, - parallel_backend, - register_parallel_backend, - ) + from joblib import logger + from joblib import dump, load + from joblib import __version__ + from joblib import effective_n_jobs + from joblib import hash + from joblib import cpu_count, Parallel, Memory, delayed + from joblib import parallel_backend, register_parallel_backend __all__ = [ diff --git a/sklearn/utils/_logistic_sigmoid.pyx b/sklearn/utils/_logistic_sigmoid.pyx index 8e139264fa313..c2ba685dbfcbd 100644 --- a/sklearn/utils/_logistic_sigmoid.pyx +++ b/sklearn/utils/_logistic_sigmoid.pyx @@ -1,7 +1,6 @@ -from libc.math cimport exp, log +from libc.math cimport log, exp import numpy as np - cimport numpy as np np.import_array() diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py index 07332bf1edbd4..d57cf839d962f 100644 --- a/sklearn/utils/_mask.py +++ b/sklearn/utils/_mask.py @@ -1,7 +1,6 @@ -from contextlib import suppress - import numpy as np from scipy import sparse as sp +from contextlib import suppress from . import is_scalar_nan from .fixes import _object_dtype_isnan diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py index 7edf9b7fc9ac3..c7451dce1fbc5 100644 --- a/sklearn/utils/_mocking.py +++ b/sklearn/utils/_mocking.py @@ -1,7 +1,8 @@ import numpy as np from ..base import BaseEstimator, ClassifierMixin -from .validation import _check_sample_weight, _num_samples, check_array, check_is_fitted +from .validation import _check_sample_weight, _num_samples, check_array +from .validation import check_is_fitted class ArraySlicingWrapper: diff --git a/sklearn/utils/_pprint.py b/sklearn/utils/_pprint.py index cea1510746cbe..c96b1ce764c4a 100644 --- a/sklearn/utils/_pprint.py +++ b/sklearn/utils/_pprint.py @@ -67,8 +67,8 @@ import pprint from collections import OrderedDict -from .._config import get_config from ..base import BaseEstimator +from .._config import get_config from . import is_scalar_nan diff --git a/sklearn/utils/_random.pxd b/sklearn/utils/_random.pxd index 51b48deacdf72..73b5505bc0e80 100644 --- a/sklearn/utils/_random.pxd +++ b/sklearn/utils/_random.pxd @@ -4,9 +4,7 @@ import numpy as np - cimport numpy as cnp - ctypedef cnp.npy_uint32 UINT32_t cdef inline UINT32_t DEFAULT_SEED = 1 diff --git a/sklearn/utils/_random.pyx b/sklearn/utils/_random.pyx index 5fec389608a6f..0fb3822807eca 100644 --- a/sklearn/utils/_random.pyx +++ b/sklearn/utils/_random.pyx @@ -13,14 +13,11 @@ The module contains: cimport cython import numpy as np - cimport numpy as cnp - cnp.import_array() from . import check_random_state - cdef UINT32_t DEFAULT_SEED = 1 diff --git a/sklearn/utils/_readonly_array_wrapper.pyx b/sklearn/utils/_readonly_array_wrapper.pyx index 842ab3a0eba94..2c81330df2eb0 100644 --- a/sklearn/utils/_readonly_array_wrapper.pyx +++ b/sklearn/utils/_readonly_array_wrapper.pyx @@ -12,12 +12,12 @@ This way, we can use it on arrays that we don't touch. # TODO: Remove with Cython >= 3.0 which supports const memoryviews for fused types. from cpython cimport Py_buffer -from cpython.buffer cimport PyBUF_WRITABLE, PyBuffer_Release, PyObject_GetBuffer +from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_WRITABLE import numpy as np - cimport numpy as np + np.import_array() diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py index b387e2fd6f648..dbef0d5bb4bcf 100644 --- a/sklearn/utils/_show_versions.py +++ b/sklearn/utils/_show_versions.py @@ -7,9 +7,10 @@ import platform import sys - -from .. import __version__ from ..utils.fixes import threadpool_info +from .. import __version__ + + from ._openmp_helpers import _openmp_parallelism_enabled @@ -67,7 +68,7 @@ def _get_deps_info(): # therefore on our CI. # https://github.com/conda-forge/conda-forge-pinning-feedstock/issues/2089 try: - from pkg_resources import DistributionNotFound, get_distribution + from pkg_resources import get_distribution, DistributionNotFound for modname in deps: try: @@ -81,7 +82,7 @@ def _get_deps_info(): deps_info[modname] = None else: - from importlib.metadata import PackageNotFoundError, version + from importlib.metadata import version, PackageNotFoundError for modname in deps: try: diff --git a/sklearn/utils/_sorting.pxd b/sklearn/utils/_sorting.pxd index 19f1594e3fe55..412d67c479fac 100644 --- a/sklearn/utils/_sorting.pxd +++ b/sklearn/utils/_sorting.pxd @@ -1,7 +1,6 @@ -from cython cimport floating - from ._typedefs cimport DTYPE_t, ITYPE_t +from cython cimport floating cdef int simultaneous_sort( floating *dist, diff --git a/sklearn/utils/_sorting.pyx b/sklearn/utils/_sorting.pyx index 22da3b95d79f2..367448b5cb91b 100644 --- a/sklearn/utils/_sorting.pyx +++ b/sklearn/utils/_sorting.pyx @@ -1,6 +1,5 @@ from cython cimport floating - cdef inline void dual_swap( floating* darr, ITYPE_t *iarr, diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 72e8e700c9525..453f3437307a9 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -10,25 +10,27 @@ # Giorgio Patrini # Thierry Guillemot # License: BSD 3 clause -import atexit -import contextlib -import functools -import inspect import os import os.path as op -import re -import shutil +import inspect +import warnings import sys +import functools import tempfile -import unittest -import warnings +from subprocess import check_output, STDOUT, CalledProcessError +from subprocess import TimeoutExpired +import re +import contextlib from collections.abc import Iterable + +import scipy as sp from functools import wraps from inspect import signature -from subprocess import STDOUT, CalledProcessError, TimeoutExpired, check_output -from unittest import TestCase -import scipy as sp +import shutil +import atexit +import unittest +from unittest import TestCase # WindowsError only exist on Windows try: @@ -36,26 +38,29 @@ except NameError: WindowsError = None -import numpy as np from numpy.testing import assert_allclose as np_assert_allclose -from numpy.testing import ( - assert_almost_equal, - assert_approx_equal, - assert_array_almost_equal, - assert_array_equal, - assert_array_less, -) - +from numpy.testing import assert_almost_equal +from numpy.testing import assert_approx_equal +from numpy.testing import assert_array_equal +from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_less +import numpy as np import joblib + import sklearn from sklearn.utils import ( - _IS_32BIT, IS_PYPY, - _in_unstable_openblas_configuration, + _IS_32BIT, deprecated, + _in_unstable_openblas_configuration, ) from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import check_array, check_is_fitted, check_X_y +from sklearn.utils.validation import ( + check_array, + check_is_fitted, + check_X_y, +) + __all__ = [ "assert_raises", diff --git a/sklearn/utils/_vector_sentinel.pxd b/sklearn/utils/_vector_sentinel.pxd index 3f9ca3120002d..5fa0f6ad8d00a 100644 --- a/sklearn/utils/_vector_sentinel.pxd +++ b/sklearn/utils/_vector_sentinel.pxd @@ -1,7 +1,7 @@ cimport numpy as np -from libcpp.vector cimport vector -from ..utils._typedefs cimport DTYPE_t, INT32TYPE_t, INT64TYPE_t, ITYPE_t +from libcpp.vector cimport vector +from ..utils._typedefs cimport ITYPE_t, DTYPE_t, INT32TYPE_t, INT64TYPE_t ctypedef fused vector_typed: vector[DTYPE_t] diff --git a/sklearn/utils/_vector_sentinel.pyx b/sklearn/utils/_vector_sentinel.pyx index 0588a55bcd7f8..0938ada0f56c1 100644 --- a/sklearn/utils/_vector_sentinel.pyx +++ b/sklearn/utils/_vector_sentinel.pyx @@ -1,8 +1,8 @@ -cimport numpy as np -from cpython.ref cimport Py_INCREF from cython.operator cimport dereference as deref +from cpython.ref cimport Py_INCREF +cimport numpy as np -from ._typedefs cimport DTYPECODE, INT32TYPECODE, INT64TYPECODE, ITYPECODE +from ._typedefs cimport DTYPECODE, ITYPECODE, INT32TYPECODE, INT64TYPECODE np.import_array() diff --git a/sklearn/utils/arrayfuncs.pyx b/sklearn/utils/arrayfuncs.pyx index f6509b81dabcb..f494499923c71 100644 --- a/sklearn/utils/arrayfuncs.pyx +++ b/sklearn/utils/arrayfuncs.pyx @@ -4,15 +4,13 @@ Small collection of auxiliary functions that operate on arrays """ cimport numpy as np - -import numpy as np - +import numpy as np cimport cython from cython cimport floating -from libc.float cimport DBL_MAX, FLT_MAX from libc.math cimport fabs +from libc.float cimport DBL_MAX, FLT_MAX -from ._cython_blas cimport _copy, _rot, _rotg +from ._cython_blas cimport _copy, _rotg, _rot ctypedef np.float64_t DOUBLE @@ -56,7 +54,7 @@ def cholesky_delete(np.ndarray[floating, ndim=2] L, int go_out): floating c, s floating *L1 int i - + if floating is float: m /= sizeof(float) else: diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index ef0e7c13612f3..bdcee747129d9 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -3,6 +3,7 @@ # License: BSD 3 clause import numpy as np + from scipy import sparse diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py index a1c941bbefc09..2ee07154dc49b 100644 --- a/sklearn/utils/deprecation.py +++ b/sklearn/utils/deprecation.py @@ -1,5 +1,6 @@ -import functools import warnings +import functools + __all__ = ["deprecated"] diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 4c538bab992e3..33cd54a5cb4ac 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -1,7 +1,7 @@ -import pickle -import re import types import warnings +import pickle +import re from copy import deepcopy from functools import partial, wraps from inspect import signature @@ -9,59 +9,68 @@ import numpy as np from scipy import sparse from scipy.stats import rankdata - import joblib +from . import IS_PYPY from .. import config_context +from ._testing import _get_args +from ._testing import assert_raise_message +from ._testing import assert_array_equal +from ._testing import assert_array_almost_equal +from ._testing import assert_allclose +from ._testing import assert_allclose_dense_sparse +from ._testing import assert_array_less +from ._testing import set_random_state +from ._testing import SkipTest +from ._testing import ignore_warnings +from ._testing import create_memmap_backed_data +from ._testing import raises +from . import is_scalar_nan + +from ..linear_model import LinearRegression +from ..linear_model import LogisticRegression +from ..linear_model import RANSACRegressor +from ..linear_model import Ridge +from ..linear_model import SGDRegressor + from ..base import ( - ClusterMixin, - RegressorMixin, clone, + ClusterMixin, is_classifier, - is_outlier_detector, is_regressor, + is_outlier_detector, + RegressorMixin, ) + +from ..metrics import accuracy_score, adjusted_rand_score, f1_score +from ..random_projection import BaseRandomProjection +from ..feature_selection import SelectKBest +from ..feature_selection import SelectFromModel +from ..pipeline import make_pipeline +from ..exceptions import DataConversionWarning +from ..exceptions import NotFittedError +from ..exceptions import SkipTestWarning +from ..model_selection import train_test_split +from ..model_selection import ShuffleSplit +from ..model_selection._validation import _safe_split +from ..metrics.pairwise import rbf_kernel, linear_kernel, pairwise_distances +from ..utils.fixes import threadpool_info +from ..utils.validation import check_is_fitted + +from . import shuffle +from ._tags import ( + _DEFAULT_TAGS, + _safe_tags, +) +from .validation import has_fit_parameter, _num_samples +from ..preprocessing import StandardScaler +from ..preprocessing import scale from ..datasets import ( load_iris, make_blobs, make_multilabel_classification, make_regression, ) -from ..exceptions import DataConversionWarning, NotFittedError, SkipTestWarning -from ..feature_selection import SelectFromModel, SelectKBest -from ..linear_model import ( - LinearRegression, - LogisticRegression, - RANSACRegressor, - Ridge, - SGDRegressor, -) -from ..metrics import accuracy_score, adjusted_rand_score, f1_score -from ..metrics.pairwise import linear_kernel, pairwise_distances, rbf_kernel -from ..model_selection import ShuffleSplit, train_test_split -from ..model_selection._validation import _safe_split -from ..pipeline import make_pipeline -from ..preprocessing import StandardScaler, scale -from ..random_projection import BaseRandomProjection -from ..utils.fixes import threadpool_info -from ..utils.validation import check_is_fitted -from . import IS_PYPY, is_scalar_nan, shuffle -from ._tags import _DEFAULT_TAGS, _safe_tags -from ._testing import ( - SkipTest, - _get_args, - assert_allclose, - assert_allclose_dense_sparse, - assert_array_almost_equal, - assert_array_equal, - assert_array_less, - assert_raise_message, - create_memmap_backed_data, - ignore_warnings, - raises, - set_random_state, -) -from .validation import _num_samples, has_fit_parameter REGRESSION_DATASET = None CROSS_DECOMPOSITION = ["PLSCanonical", "PLSRegression", "CCA", "PLSSVD"] diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 8953b73515dec..b0074ae7e3a18 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -10,19 +10,18 @@ # # License: BSD 3 clause -import functools from functools import update_wrapper +import functools +import sklearn import numpy as np import scipy import scipy.stats import threadpoolctl - -import sklearn - from .._config import config_context, get_config from ..externals._packaging.version import parse as parse_version + np_version = parse_version(np.__version__) sp_version = parse_version(scipy.__version__) @@ -36,12 +35,9 @@ from ..externals._lobpcg import lobpcg # type: ignore # noqa try: - from scipy.optimize._linesearch import line_search_wolfe1, line_search_wolfe2 + from scipy.optimize._linesearch import line_search_wolfe2, line_search_wolfe1 except ImportError: # SciPy < 1.8 - from scipy.optimize.linesearch import ( # type: ignore # noqa - line_search_wolfe1, - line_search_wolfe2, - ) + from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1 # type: ignore # noqa def _object_dtype_isnan(X): diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py index f7ef0a051e3c3..78cdf8d31fdca 100644 --- a/sklearn/utils/graph.py +++ b/sklearn/utils/graph.py @@ -13,8 +13,8 @@ import numpy as np from scipy import sparse -from ..metrics.pairwise import pairwise_distances from .deprecation import deprecated +from ..metrics.pairwise import pairwise_distances ############################################################################### diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py index 8b3aa3b5bcd4a..1cee8d1d42cf4 100644 --- a/sklearn/utils/metaestimators.py +++ b/sklearn/utils/metaestimators.py @@ -2,19 +2,20 @@ # Author: Joel Nothman # Andreas Mueller # License: BSD +from typing import List, Any +from types import MethodType import warnings +from functools import wraps + from abc import ABCMeta, abstractmethod -from contextlib import suppress -from functools import update_wrapper, wraps from operator import attrgetter -from types import MethodType -from typing import Any, List - +from functools import update_wrapper import numpy as np +from contextlib import suppress -from ..base import BaseEstimator from ..utils import _safe_indexing from ..utils._tags import _safe_tags +from ..base import BaseEstimator __all__ = ["available_if", "if_delegate_has_method"] diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py index d6b1d43ef9e75..5311076e64eb8 100644 --- a/sklearn/utils/multiclass.py +++ b/sklearn/utils/multiclass.py @@ -6,14 +6,17 @@ ========================================== """ -import warnings from collections.abc import Sequence from itertools import chain +import warnings + +from scipy.sparse import issparse +from scipy.sparse import dok_matrix +from scipy.sparse import lil_matrix import numpy as np -from scipy.sparse import dok_matrix, issparse, lil_matrix -from .validation import _assert_all_finite, check_array +from .validation import check_array, _assert_all_finite def _unique_multiclass(y): diff --git a/sklearn/utils/murmurhash.pyx b/sklearn/utils/murmurhash.pyx index 0e6a71e82002e..dc9c3da08906f 100644 --- a/sklearn/utils/murmurhash.pyx +++ b/sklearn/utils/murmurhash.pyx @@ -17,10 +17,8 @@ and can be found here: cimport cython cimport numpy as np - import numpy as np - cdef extern from "src/MurmurHash3.h": void MurmurHash3_x86_32(void *key, int len, np.uint32_t seed, void *out) void MurmurHash3_x86_128(void *key, int len, np.uint32_t seed, void *out) diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py index 68a1ae1dddb98..7e9b864afe043 100644 --- a/sklearn/utils/optimize.py +++ b/sklearn/utils/optimize.py @@ -13,12 +13,11 @@ # Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour # License: BSD -import warnings - import numpy as np +import warnings -from ..exceptions import ConvergenceWarning from .fixes import line_search_wolfe1, line_search_wolfe2 +from ..exceptions import ConvergenceWarning class _LineSearchError(RuntimeError): diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py index 19e3a8c89addb..e3bdf2c6c7298 100644 --- a/sklearn/utils/random.py +++ b/sklearn/utils/random.py @@ -1,10 +1,9 @@ # Author: Hamzeh Alsalhi # # License: BSD 3 clause -import array - import numpy as np import scipy.sparse as sp +import array from . import check_random_state from ._random import sample_without_replacement diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py index 95f1bc36b2b1a..d53741c044c47 100644 --- a/sklearn/utils/sparsefuncs.py +++ b/sklearn/utils/sparsefuncs.py @@ -3,13 +3,15 @@ # Giorgio Patrini # # License: BSD 3 clause -import numpy as np import scipy.sparse as sp +import numpy as np +from .sparsefuncs_fast import ( + csr_mean_variance_axis0 as _csr_mean_var_axis0, + csc_mean_variance_axis0 as _csc_mean_var_axis0, + incr_mean_variance_axis0 as _incr_mean_var_axis0, +) from ..utils.validation import _check_sample_weight -from .sparsefuncs_fast import csc_mean_variance_axis0 as _csc_mean_var_axis0 -from .sparsefuncs_fast import csr_mean_variance_axis0 as _csr_mean_var_axis0 -from .sparsefuncs_fast import incr_mean_variance_axis0 as _incr_mean_var_axis0 def _raise_typeerror(X): diff --git a/sklearn/utils/sparsefuncs_fast.pyx b/sklearn/utils/sparsefuncs_fast.pyx index eb5fe00e20928..ee12730d02b2d 100644 --- a/sklearn/utils/sparsefuncs_fast.pyx +++ b/sklearn/utils/sparsefuncs_fast.pyx @@ -8,11 +8,9 @@ #!python +from libc.math cimport fabs, sqrt, pow cimport numpy as np -from libc.math cimport fabs, pow, sqrt - import numpy as np - cimport cython from cython cimport floating from numpy.math cimport isnan diff --git a/sklearn/utils/tests/test_arrayfuncs.py b/sklearn/utils/tests/test_arrayfuncs.py index b0a02e13d1639..5c43e480d395c 100644 --- a/sklearn/utils/tests/test_arrayfuncs.py +++ b/sklearn/utils/tests/test_arrayfuncs.py @@ -1,5 +1,5 @@ -import numpy as np import pytest +import numpy as np from sklearn.utils._testing import assert_allclose from sklearn.utils.arrayfuncs import min_pos diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index 9072f951eea7c..61f43c69050d1 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -6,8 +6,11 @@ from sklearn.datasets import make_blobs from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier -from sklearn.utils._testing import assert_almost_equal, assert_array_almost_equal -from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight + +from sklearn.utils.class_weight import compute_class_weight +from sklearn.utils.class_weight import compute_sample_weight +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_almost_equal def test_compute_class_weight(): diff --git a/sklearn/utils/tests/test_cython_blas.py b/sklearn/utils/tests/test_cython_blas.py index e57bfc3ec5a9c..1b311f5160db5 100644 --- a/sklearn/utils/tests/test_cython_blas.py +++ b/sklearn/utils/tests/test_cython_blas.py @@ -1,24 +1,21 @@ -import numpy as np import pytest -from sklearn.utils._cython_blas import ( - ColMajor, - NoTrans, - RowMajor, - Trans, - _asum_memview, - _axpy_memview, - _copy_memview, - _dot_memview, - _gemm_memview, - _gemv_memview, - _ger_memview, - _nrm2_memview, - _rot_memview, - _rotg_memview, - _scal_memview, -) +import numpy as np + from sklearn.utils._testing import assert_allclose +from sklearn.utils._cython_blas import _dot_memview +from sklearn.utils._cython_blas import _asum_memview +from sklearn.utils._cython_blas import _axpy_memview +from sklearn.utils._cython_blas import _nrm2_memview +from sklearn.utils._cython_blas import _copy_memview +from sklearn.utils._cython_blas import _scal_memview +from sklearn.utils._cython_blas import _rotg_memview +from sklearn.utils._cython_blas import _rot_memview +from sklearn.utils._cython_blas import _gemv_memview +from sklearn.utils._cython_blas import _ger_memview +from sklearn.utils._cython_blas import _gemm_memview +from sklearn.utils._cython_blas import RowMajor, ColMajor +from sklearn.utils._cython_blas import Trans, NoTrans def _numpy_to_cython(dtype): diff --git a/sklearn/utils/tests/test_cython_templating.py b/sklearn/utils/tests/test_cython_templating.py index f5c9fa7a9087e..eeb8319e07415 100644 --- a/sklearn/utils/tests/test_cython_templating.py +++ b/sklearn/utils/tests/test_cython_templating.py @@ -1,7 +1,5 @@ import pathlib - import pytest - import sklearn diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py index 88fe43ffb7b16..e39486cc0318a 100644 --- a/sklearn/utils/tests/test_deprecation.py +++ b/sklearn/utils/tests/test_deprecation.py @@ -4,10 +4,10 @@ import pickle +from sklearn.utils.deprecation import _is_deprecated +from sklearn.utils.deprecation import deprecated import pytest -from sklearn.utils.deprecation import _is_deprecated, deprecated - @deprecated("qwerty") class MockClass1: diff --git a/sklearn/utils/tests/test_encode.py b/sklearn/utils/tests/test_encode.py index 9118eb56f0ba4..083db25b7ca80 100644 --- a/sklearn/utils/tests/test_encode.py +++ b/sklearn/utils/tests/test_encode.py @@ -4,7 +4,10 @@ import pytest from numpy.testing import assert_array_equal -from sklearn.utils._encode import _check_unknown, _encode, _get_counts, _unique +from sklearn.utils._encode import _unique +from sklearn.utils._encode import _encode +from sklearn.utils._encode import _check_unknown +from sklearn.utils._encode import _get_counts @pytest.mark.parametrize( diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index df1aa28bc4dd2..3a88b4431fe86 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -2,42 +2,43 @@ # build_tools/azure/test_pytest_soft_dependency.sh on these # tests to make sure estimator_checks works without pytest. -import sys import unittest +import sys import warnings import numpy as np import scipy.sparse as sp - import joblib + from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.cluster import MiniBatchKMeans from sklearn.datasets import make_multilabel_classification -from sklearn.decomposition import PCA -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.exceptions import SkipTestWarning -from sklearn.linear_model import ( - LinearRegression, - LogisticRegression, - MultiTaskElasticNet, - SGDClassifier, -) -from sklearn.mixture import GaussianMixture -from sklearn.neighbors import KNeighborsRegressor -from sklearn.svm import SVC, NuSVC -from sklearn.utils import all_estimators, deprecated +from sklearn.utils import deprecated from sklearn.utils._testing import ( + raises, + ignore_warnings, MinimalClassifier, MinimalRegressor, MinimalTransformer, SkipTest, - ignore_warnings, - raises, ) + +from sklearn.utils.validation import check_is_fitted, check_X_y +from sklearn.ensemble import ExtraTreesClassifier +from sklearn.linear_model import LinearRegression, SGDClassifier +from sklearn.mixture import GaussianMixture +from sklearn.cluster import MiniBatchKMeans +from sklearn.decomposition import PCA +from sklearn.linear_model import MultiTaskElasticNet, LogisticRegression +from sklearn.svm import SVC, NuSVC +from sklearn.neighbors import KNeighborsRegressor +from sklearn.utils.validation import check_array +from sklearn.utils import all_estimators +from sklearn.exceptions import SkipTestWarning +from sklearn.utils.metaestimators import available_if + from sklearn.utils.estimator_checks import ( _NotAnArray, _set_checking_parameters, - _yield_all_checks, check_class_weight_balanced_linear_classifier, check_classifier_data_not_an_array, check_classifiers_multilabel_output_format_decision_function, @@ -47,18 +48,17 @@ check_estimator, check_estimator_get_tags_default_keys, check_estimators_unfitted, - check_fit_check_is_fitted, check_fit_score_takes_y, - check_methods_sample_order_invariance, - check_methods_subset_invariance, check_no_attributes_set_in_init, - check_outlier_corruption, check_regressor_data_not_an_array, check_requires_y_none, + check_outlier_corruption, set_random_state, + check_fit_check_is_fitted, + check_methods_sample_order_invariance, + check_methods_subset_invariance, + _yield_all_checks, ) -from sklearn.utils.metaestimators import available_if -from sklearn.utils.validation import check_array, check_is_fitted, check_X_y class CorrectNotFittedError(ValueError): diff --git a/sklearn/utils/tests/test_estimator_html_repr.py b/sklearn/utils/tests/test_estimator_html_repr.py index 696a9eff7a911..91644819864eb 100644 --- a/sklearn/utils/tests/test_estimator_html_repr.py +++ b/sklearn/utils/tests/test_estimator_html_repr.py @@ -1,31 +1,37 @@ -import html from contextlib import closing +import html from io import StringIO import pytest from sklearn import config_context -from sklearn.cluster import AgglomerativeClustering, Birch +from sklearn.linear_model import LogisticRegression +from sklearn.neural_network import MLPClassifier +from sklearn.impute import SimpleImputer +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD +from sklearn.pipeline import Pipeline +from sklearn.pipeline import FeatureUnion from sklearn.compose import ColumnTransformer -from sklearn.decomposition import PCA, TruncatedSVD -from sklearn.ensemble import StackingClassifier, StackingRegressor, VotingClassifier +from sklearn.ensemble import VotingClassifier from sklearn.feature_selection import SelectPercentile +from sklearn.cluster import Birch +from sklearn.cluster import AgglomerativeClustering +from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import StandardScaler +from sklearn.svm import LinearSVC +from sklearn.svm import LinearSVR +from sklearn.tree import DecisionTreeClassifier +from sklearn.multiclass import OneVsOneClassifier +from sklearn.ensemble import StackingClassifier +from sklearn.ensemble import StackingRegressor from sklearn.gaussian_process.kernels import ExpSineSquared -from sklearn.impute import SimpleImputer from sklearn.kernel_ridge import KernelRidge -from sklearn.linear_model import LogisticRegression + from sklearn.model_selection import RandomizedSearchCV -from sklearn.multiclass import OneVsOneClassifier -from sklearn.neural_network import MLPClassifier -from sklearn.pipeline import FeatureUnion, Pipeline -from sklearn.preprocessing import OneHotEncoder, StandardScaler -from sklearn.svm import LinearSVC, LinearSVR -from sklearn.tree import DecisionTreeClassifier -from sklearn.utils._estimator_html_repr import ( - _get_visual_block, - _write_label_html, - estimator_html_repr, -) +from sklearn.utils._estimator_html_repr import _write_label_html +from sklearn.utils._estimator_html_repr import _get_visual_block +from sklearn.utils._estimator_html_repr import estimator_html_repr @pytest.mark.parametrize("checked", [True, False]) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index 41df3f08d0574..ece7c180300a1 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -5,38 +5,35 @@ # License: BSD 3 clause import numpy as np -import pytest -from scipy import linalg, sparse, stats +from scipy import sparse +from scipy import linalg +from scipy import stats from scipy.sparse.linalg import eigsh from scipy.special import expit -from sklearn.datasets import make_low_rank_matrix, make_sparse_spd_matrix +import pytest from sklearn.utils import gen_batches from sklearn.utils._arpack import _init_arpack_v0 -from sklearn.utils._testing import ( - assert_allclose, - assert_allclose_dense_sparse, - assert_almost_equal, - assert_array_almost_equal, - assert_array_equal, - skip_if_32bit, -) -from sklearn.utils.extmath import ( - _deterministic_vector_sign_flip, - _incremental_mean_and_var, - _randomized_eigsh, - _safe_accumulator_op, - cartesian, - density, - log_logistic, - randomized_svd, - row_norms, - safe_sparse_dot, - softmax, - stable_cumsum, - svd_flip, - weighted_mode, -) +from sklearn.utils._testing import assert_almost_equal +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import skip_if_32bit + +from sklearn.utils.extmath import density, _safe_accumulator_op +from sklearn.utils.extmath import randomized_svd, _randomized_eigsh +from sklearn.utils.extmath import row_norms +from sklearn.utils.extmath import weighted_mode +from sklearn.utils.extmath import cartesian +from sklearn.utils.extmath import log_logistic +from sklearn.utils.extmath import svd_flip +from sklearn.utils.extmath import _incremental_mean_and_var +from sklearn.utils.extmath import _deterministic_vector_sign_flip +from sklearn.utils.extmath import softmax +from sklearn.utils.extmath import stable_cumsum +from sklearn.utils.extmath import safe_sparse_dot +from sklearn.datasets import make_low_rank_matrix, make_sparse_spd_matrix def test_density(): diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 635cb0f7b9842..3566897da5efc 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -10,7 +10,9 @@ import scipy.stats from sklearn.utils._testing import assert_array_equal -from sklearn.utils.fixes import _object_dtype_isnan, loguniform + +from sklearn.utils.fixes import _object_dtype_isnan +from sklearn.utils.fixes import loguniform @pytest.mark.parametrize("dtype, val", ([object, 1], [object, "a"], [float, 1])) diff --git a/sklearn/utils/tests/test_graph.py b/sklearn/utils/tests/test_graph.py index d64108a40d8ab..78196fbb05fba 100644 --- a/sklearn/utils/tests/test_graph.py +++ b/sklearn/utils/tests/test_graph.py @@ -1,10 +1,10 @@ -import numpy as np import pytest +import numpy as np from scipy.sparse.csgraph import connected_components -from sklearn.metrics.pairwise import pairwise_distances from sklearn.neighbors import kneighbors_graph from sklearn.utils.graph import _fix_connected_components +from sklearn.metrics.pairwise import pairwise_distances def test_fix_connected_components(): diff --git a/sklearn/utils/tests/test_metaestimators.py b/sklearn/utils/tests/test_metaestimators.py index 26215f20c3a33..2a75ab387df60 100644 --- a/sklearn/utils/tests/test_metaestimators.py +++ b/sklearn/utils/tests/test_metaestimators.py @@ -1,10 +1,11 @@ -import pickle -import warnings - import numpy as np import pytest +import warnings + +import pickle -from sklearn.utils.metaestimators import available_if, if_delegate_has_method +from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils.metaestimators import available_if class Prefix: diff --git a/sklearn/utils/tests/test_mocking.py b/sklearn/utils/tests/test_mocking.py index 3c15384bf4757..a12c41256581a 100644 --- a/sklearn/utils/tests/test_mocking.py +++ b/sklearn/utils/tests/test_mocking.py @@ -1,13 +1,17 @@ import numpy as np import pytest -from numpy.testing import assert_allclose, assert_array_equal from scipy import sparse +from numpy.testing import assert_array_equal +from numpy.testing import assert_allclose + from sklearn.datasets import load_iris -from sklearn.utils import _safe_indexing, check_array -from sklearn.utils._mocking import CheckingClassifier +from sklearn.utils import check_array +from sklearn.utils import _safe_indexing from sklearn.utils._testing import _convert_container +from sklearn.utils._mocking import CheckingClassifier + @pytest.fixture def iris(): diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index a336e5ea3f412..996a25bc3a42b 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -1,35 +1,32 @@ -from itertools import product - import numpy as np -import pytest import scipy.sparse as sp -from scipy.sparse import ( - coo_matrix, - csc_matrix, - csr_matrix, - dok_matrix, - issparse, - lil_matrix, -) +from itertools import product +import pytest -from sklearn import datasets -from sklearn.model_selection import ShuffleSplit -from sklearn.svm import SVC -from sklearn.utils._testing import ( - assert_allclose, - assert_array_almost_equal, - assert_array_equal, -) +from scipy.sparse import issparse +from scipy.sparse import csc_matrix +from scipy.sparse import csr_matrix +from scipy.sparse import coo_matrix +from scipy.sparse import dok_matrix +from scipy.sparse import lil_matrix + +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose from sklearn.utils.estimator_checks import _NotAnArray + +from sklearn.utils.multiclass import unique_labels +from sklearn.utils.multiclass import is_multilabel +from sklearn.utils.multiclass import type_of_target +from sklearn.utils.multiclass import class_distribution +from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.multiclass import _ovr_decision_function + from sklearn.utils.metaestimators import _safe_split -from sklearn.utils.multiclass import ( - _ovr_decision_function, - check_classification_targets, - class_distribution, - is_multilabel, - type_of_target, - unique_labels, -) +from sklearn.model_selection import ShuffleSplit +from sklearn.svm import SVC +from sklearn import datasets + EXAMPLES = { "multilabel-indicator": [ diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py index 18730302124f9..4403c9a49275c 100644 --- a/sklearn/utils/tests/test_murmurhash.py +++ b/sklearn/utils/tests/test_murmurhash.py @@ -3,9 +3,9 @@ # License: BSD 3 clause import numpy as np -from numpy.testing import assert_array_almost_equal, assert_array_equal - from sklearn.utils.murmurhash import murmurhash3_32 +from numpy.testing import assert_array_almost_equal +from numpy.testing import assert_array_equal def test_mmhash3_int(): diff --git a/sklearn/utils/tests/test_optimize.py b/sklearn/utils/tests/test_optimize.py index a8bcd1aebf793..82719635366b0 100644 --- a/sklearn/utils/tests/test_optimize.py +++ b/sklearn/utils/tests/test_optimize.py @@ -1,8 +1,9 @@ import numpy as np + +from sklearn.utils.optimize import _newton_cg from scipy.optimize import fmin_ncg from sklearn.utils._testing import assert_array_almost_equal -from sklearn.utils.optimize import _newton_cg def test_newton_cg(): diff --git a/sklearn/utils/tests/test_parallel.py b/sklearn/utils/tests/test_parallel.py index 45c068170baa3..dfecd7b464168 100644 --- a/sklearn/utils/tests/test_parallel.py +++ b/sklearn/utils/tests/test_parallel.py @@ -1,7 +1,8 @@ import pytest +from joblib import Parallel + from numpy.testing import assert_array_equal -from joblib import Parallel from sklearn._config import config_context, get_config from sklearn.utils.fixes import delayed diff --git a/sklearn/utils/tests/test_pprint.py b/sklearn/utils/tests/test_pprint.py index c38f1b675d0d9..aa1e2e03841e9 100644 --- a/sklearn/utils/tests/test_pprint.py +++ b/sklearn/utils/tests/test_pprint.py @@ -3,12 +3,13 @@ import numpy as np -from sklearn import config_context -from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.feature_selection import SelectKBest, chi2 +from sklearn.utils._pprint import _EstimatorPrettyPrinter from sklearn.linear_model import LogisticRegressionCV from sklearn.pipeline import make_pipeline -from sklearn.utils._pprint import _EstimatorPrettyPrinter +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.feature_selection import SelectKBest, chi2 +from sklearn import config_context + # Ignore flake8 (lots of line too long issues) # flake8: noqa diff --git a/sklearn/utils/tests/test_random.py b/sklearn/utils/tests/test_random.py index 833d27edf05ca..320ebe8b1ae65 100644 --- a/sklearn/utils/tests/test_random.py +++ b/sklearn/utils/tests/test_random.py @@ -1,11 +1,11 @@ import numpy as np import pytest import scipy.sparse as sp -from numpy.testing import assert_array_almost_equal from scipy.special import comb +from numpy.testing import assert_array_almost_equal -from sklearn.utils._random import _our_rand_r_py from sklearn.utils.random import _random_choice_csc, sample_without_replacement +from sklearn.utils._random import _our_rand_r_py ############################################################################### diff --git a/sklearn/utils/tests/test_readonly_wrapper.py b/sklearn/utils/tests/test_readonly_wrapper.py index f7c0077ce3b87..38163cc2461ce 100644 --- a/sklearn/utils/tests/test_readonly_wrapper.py +++ b/sklearn/utils/tests/test_readonly_wrapper.py @@ -1,4 +1,5 @@ import numpy as np + import pytest from sklearn.utils._readonly_array_wrapper import ReadonlyArrayWrapper, _test_sum diff --git a/sklearn/utils/tests/test_seq_dataset.py b/sklearn/utils/tests/test_seq_dataset.py index 18f1be208b3f1..5c876fe62d74b 100644 --- a/sklearn/utils/tests/test_seq_dataset.py +++ b/sklearn/utils/tests/test_seq_dataset.py @@ -7,14 +7,14 @@ import pytest import scipy.sparse as sp from numpy.testing import assert_array_equal - -from sklearn.datasets import load_iris from sklearn.utils._seq_dataset import ( ArrayDataset32, ArrayDataset64, CSRDataset32, CSRDataset64, ) + +from sklearn.datasets import load_iris from sklearn.utils._testing import assert_allclose iris = load_iris() diff --git a/sklearn/utils/tests/test_shortest_path.py b/sklearn/utils/tests/test_shortest_path.py index 416187ff35fd3..4e2618d99d54a 100644 --- a/sklearn/utils/tests/test_shortest_path.py +++ b/sklearn/utils/tests/test_shortest_path.py @@ -3,7 +3,6 @@ import numpy as np import pytest from numpy.testing import assert_array_almost_equal - from sklearn.utils.graph import graph_shortest_path, single_source_shortest_path_length diff --git a/sklearn/utils/tests/test_show_versions.py b/sklearn/utils/tests/test_show_versions.py index bd166dfd8e522..e6590bfde15f5 100644 --- a/sklearn/utils/tests/test_show_versions.py +++ b/sklearn/utils/tests/test_show_versions.py @@ -1,6 +1,8 @@ -from sklearn.utils._show_versions import _get_deps_info, _get_sys_info, show_versions -from sklearn.utils._testing import ignore_warnings from sklearn.utils.fixes import threadpool_info +from sklearn.utils._show_versions import _get_sys_info +from sklearn.utils._show_versions import _get_deps_info +from sklearn.utils._show_versions import show_versions +from sklearn.utils._testing import ignore_warnings def test_get_sys_info(): diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index f3bcaf56bb561..6a86be2f0445f 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -1,29 +1,30 @@ -import numpy as np import pytest +import numpy as np import scipy.sparse as sp -from numpy.random import RandomState -from numpy.testing import assert_array_almost_equal, assert_array_equal + from scipy import linalg +from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.random import RandomState from sklearn.datasets import make_classification -from sklearn.utils._testing import assert_allclose from sklearn.utils.sparsefuncs import ( - count_nonzero, - csc_median_axis_0, + mean_variance_axis, incr_mean_variance_axis, inplace_column_scale, inplace_row_scale, - inplace_swap_column, inplace_swap_row, - mean_variance_axis, + inplace_swap_column, min_max_axis, + count_nonzero, + csc_median_axis_0, ) from sklearn.utils.sparsefuncs_fast import ( assign_rows_csr, - csr_row_norms, inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2, + csr_row_norms, ) +from sklearn.utils._testing import assert_allclose def test_mean_variance_axis0(): diff --git a/sklearn/utils/tests/test_tags.py b/sklearn/utils/tests/test_tags.py index b777b74921824..f96a4947164c3 100644 --- a/sklearn/utils/tests/test_tags.py +++ b/sklearn/utils/tests/test_tags.py @@ -1,7 +1,10 @@ import pytest from sklearn.base import BaseEstimator -from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags +from sklearn.utils._tags import ( + _DEFAULT_TAGS, + _safe_tags, +) class NoTagsEstimator: diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py index de46d5a847023..fca7a07b14c19 100644 --- a/sklearn/utils/tests/test_testing.py +++ b/sklearn/utils/tests/test_testing.py @@ -1,35 +1,38 @@ -import atexit -import os -import sys -import unittest import warnings +import unittest +import sys +import os +import atexit import numpy as np -import pytest + from scipy import sparse -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from sklearn.tree import DecisionTreeClassifier +import pytest + +from sklearn.utils.deprecation import deprecated +from sklearn.utils.metaestimators import available_if, if_delegate_has_method from sklearn.utils._readonly_array_wrapper import _test_sum from sklearn.utils._testing import ( - TempMemmap, - _convert_container, - _delete_folder, - assert_allclose, - assert_allclose_dense_sparse, - assert_no_warnings, - assert_raise_message, assert_raises, - assert_raises_regex, assert_warns, + assert_no_warnings, + set_random_state, + assert_raise_message, + ignore_warnings, check_docstring_parameters, + assert_allclose_dense_sparse, + assert_raises_regex, + TempMemmap, create_memmap_backed_data, - ignore_warnings, + _delete_folder, + _convert_container, raises, - set_random_state, + assert_allclose, ) -from sklearn.utils.deprecation import deprecated -from sklearn.utils.metaestimators import available_if, if_delegate_has_method + +from sklearn.tree import DecisionTreeClassifier +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis def test_set_random_state(): diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index 21e1247481302..82be82afa5eed 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -1,40 +1,37 @@ -import string -import timeit -import warnings from copy import copy from itertools import chain +import warnings +import string +import timeit -import numpy as np import pytest +import numpy as np import scipy.sparse as sp -from sklearn import config_context -from sklearn.utils import ( - _approximate_mode, - _determine_key_type, - _get_column_indices, - _message_with_time, - _print_elapsed_time, - _safe_indexing, - _to_object_array, - check_random_state, - column_or_1d, - deprecated, - gen_batches, - gen_even_slices, - get_chunk_n_rows, - is_scalar_nan, - resample, - safe_mask, - shuffle, -) -from sklearn.utils._mocking import MockDataFrame from sklearn.utils._testing import ( - _convert_container, - assert_allclose_dense_sparse, assert_array_equal, + assert_allclose_dense_sparse, assert_no_warnings, + _convert_container, ) +from sklearn.utils import check_random_state +from sklearn.utils import _determine_key_type +from sklearn.utils import deprecated +from sklearn.utils import gen_batches +from sklearn.utils import _get_column_indices +from sklearn.utils import resample +from sklearn.utils import safe_mask +from sklearn.utils import column_or_1d +from sklearn.utils import _safe_indexing +from sklearn.utils import shuffle +from sklearn.utils import gen_even_slices +from sklearn.utils import _message_with_time, _print_elapsed_time +from sklearn.utils import get_chunk_n_rows +from sklearn.utils import is_scalar_nan +from sklearn.utils import _to_object_array +from sklearn.utils import _approximate_mode +from sklearn.utils._mocking import MockDataFrame +from sklearn import config_context # toy array X_toy = np.arange(9).reshape((3, 3)) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 9672bf4f4dd01..e33d14fa3b07e 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -1,67 +1,64 @@ """Tests for input validation functions""" import numbers -import re import warnings +import re + +from tempfile import NamedTemporaryFile from itertools import product from operator import itemgetter -from tempfile import NamedTemporaryFile -import numpy as np import pytest -import scipy.sparse as sp from pytest import importorskip +import numpy as np +import scipy.sparse as sp -import sklearn -from sklearn.base import BaseEstimator -from sklearn.datasets import make_blobs -from sklearn.ensemble import RandomForestRegressor -from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning +from sklearn.utils._testing import assert_no_warnings +from sklearn.utils._testing import ignore_warnings +from sklearn.utils._testing import SkipTest +from sklearn.utils._testing import assert_array_equal +from sklearn.utils._testing import assert_allclose_dense_sparse +from sklearn.utils._testing import assert_allclose +from sklearn.utils._testing import _convert_container +from sklearn.utils import as_float_array, check_array, check_symmetric +from sklearn.utils import check_X_y +from sklearn.utils import deprecated +from sklearn.utils._mocking import MockDataFrame +from sklearn.utils.fixes import parse_version +from sklearn.utils.estimator_checks import _NotAnArray +from sklearn.random_projection import _sparse_random_matrix from sklearn.linear_model import ARDRegression from sklearn.neighbors import KNeighborsClassifier -from sklearn.random_projection import _sparse_random_matrix +from sklearn.ensemble import RandomForestRegressor from sklearn.svm import SVR -from sklearn.utils import ( - _safe_indexing, - as_float_array, - check_array, - check_symmetric, - check_X_y, - deprecated, -) -from sklearn.utils._mocking import MockDataFrame -from sklearn.utils._testing import ( - SkipTest, - TempMemmap, - _convert_container, - assert_allclose, - assert_allclose_dense_sparse, - assert_array_equal, - assert_no_warnings, - ignore_warnings, -) -from sklearn.utils.estimator_checks import _NotAnArray -from sklearn.utils.fixes import parse_version +from sklearn.datasets import make_blobs +from sklearn.utils import _safe_indexing from sklearn.utils.validation import ( - FLOAT_DTYPES, - _allclose_dense_sparse, - _check_feature_names_in, - _check_fit_params, - _check_psd_eigenvalues, - _check_sample_weight, - _check_y, - _deprecate_positional_args, - _get_feature_names, - _num_features, - _num_samples, - assert_all_finite, - check_consistent_length, + has_fit_parameter, check_is_fitted, + check_consistent_length, + assert_all_finite, check_memory, check_non_negative, + _num_samples, check_scalar, - has_fit_parameter, + _check_psd_eigenvalues, + _check_y, + _deprecate_positional_args, + _check_sample_weight, + _allclose_dense_sparse, + _num_features, + FLOAT_DTYPES, + _get_feature_names, + _check_feature_names_in, + _check_fit_params, ) +from sklearn.base import BaseEstimator +import sklearn + +from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning + +from sklearn.utils._testing import TempMemmap # TODO: Remove np.matrix usage in 1.2 diff --git a/sklearn/utils/tests/test_weight_vector.py b/sklearn/utils/tests/test_weight_vector.py index e9c9c286af62e..627d46d1fda06 100644 --- a/sklearn/utils/tests/test_weight_vector.py +++ b/sklearn/utils/tests/test_weight_vector.py @@ -1,7 +1,9 @@ import numpy as np import pytest - -from sklearn.utils._weight_vector import WeightVector32, WeightVector64 +from sklearn.utils._weight_vector import ( + WeightVector32, + WeightVector64, +) @pytest.mark.parametrize( diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index f5383905941e8..879ddac3e6f42 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -9,24 +9,26 @@ # Sylvain Marie # License: BSD 3 clause +from functools import wraps +import warnings import numbers import operator -import warnings -from contextlib import suppress -from functools import wraps -from inspect import Parameter, isclass, signature import numpy as np import scipy.sparse as sp +from inspect import signature, isclass, Parameter # mypy error: Module 'numpy.core.numeric' has no attribute 'ComplexWarning' from numpy.core.numeric import ComplexWarning # type: ignore - import joblib -from .. import get_config as _get_config -from ..exceptions import DataConversionWarning, NotFittedError, PositiveSpectrumWarning +from contextlib import suppress + from .fixes import _object_dtype_isnan +from .. import get_config as _get_config +from ..exceptions import PositiveSpectrumWarning +from ..exceptions import NotFittedError +from ..exceptions import DataConversionWarning FLOAT_DTYPES = (np.float64, np.float32, np.float16) @@ -580,9 +582,9 @@ def _pandas_dtype_needs_early_conversion(pd_dtype): # Check these early for pandas versions without extension dtypes from pandas.api.types import ( is_bool_dtype, + is_sparse, is_float_dtype, is_integer_dtype, - is_sparse, ) if is_bool_dtype(pd_dtype): From e59754b90f4ae3b3e418d5cd6b8e15cf8b6f5d46 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:36:52 +0200 Subject: [PATCH 08/10] intentionally bad import order --- sklearn/model_selection/_split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index d2a0b5e1fc329..b6692badf678c 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -20,8 +20,8 @@ from abc import ABCMeta, abstractmethod from inspect import signature -import numpy as np from scipy.special import comb +import numpy as np from ..utils import indexable, check_random_state, _safe_indexing from ..utils import _approximate_mode From d69bc5d8ddddba83306e1d0be0003f42207a0ac0 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:49:11 +0200 Subject: [PATCH 09/10] fix isort in azure pipeline --- azure-pipelines.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 974d07052c22e..6a694bf1ba9d5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -34,14 +34,14 @@ jobs: versionSpec: '3.9' - bash: | # Include pytest compatibility with mypy - pip install pytest flake8 mypy==0.782 black==22.3.0 + pip install pytest flake8 mypy==0.782 black==22.3.0 isort displayName: Install linters - bash: | black --check --diff . displayName: Run black - bash: | - black --check --diff . - displayName: Run black + isort --check --diff . + displayName: Run isort - bash: | ./build_tools/circle/linting.sh displayName: Run linting From f0101bd780ebfa7eec6bcdd757d850820f566002 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Tue, 17 May 2022 11:49:53 +0200 Subject: [PATCH 10/10] still making sure it fails --- sklearn/model_selection/_split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index b6692badf678c..d08eeba021537 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -18,10 +18,10 @@ from math import ceil, floor import numbers from abc import ABCMeta, abstractmethod -from inspect import signature from scipy.special import comb import numpy as np +from inspect import signature from ..utils import indexable, check_random_state, _safe_indexing from ..utils import _approximate_mode