diff --git a/README.rst b/README.rst index 089929278c75d..7d4e4ed9568ba 100644 --- a/README.rst +++ b/README.rst @@ -42,6 +42,7 @@ .. |PandasMinVersion| replace:: 1.0.5 .. |SeabornMinVersion| replace:: 0.9.0 .. |PytestMinVersion| replace:: 5.0.1 +.. |PlotlyMinVersion| replace:: 5.9.0 .. image:: https://raw.githubusercontent.com/scikit-learn/scikit-learn/main/doc/logos/scikit-learn-logo.png :target: https://scikit-learn.org/ @@ -83,7 +84,7 @@ classes end with "Display") require Matplotlib (>= |MatplotlibMinVersion|). For running the examples Matplotlib >= |MatplotlibMinVersion| is required. A few examples require scikit-image >= |Scikit-ImageMinVersion|, a few examples require pandas >= |PandasMinVersion|, some examples require seaborn >= -|SeabornMinVersion|. +|SeabornMinVersion| and plotly >= |PlotlyMinVersion|. User installation ~~~~~~~~~~~~~~~~~ diff --git a/build_tools/github/doc_environment.yml b/build_tools/github/doc_environment.yml index f0a933e31552a..0e332269b68ca 100644 --- a/build_tools/github/doc_environment.yml +++ b/build_tools/github/doc_environment.yml @@ -25,6 +25,7 @@ dependencies: - sphinx-gallery - numpydoc - sphinx-prompt + - plotly - pip - pip: - sphinxext-opengraph diff --git a/build_tools/github/doc_linux-64_conda.lock b/build_tools/github/doc_linux-64_conda.lock index 76e37065c5ddf..9ec0dfc306540 100644 --- a/build_tools/github/doc_linux-64_conda.lock +++ b/build_tools/github/doc_linux-64_conda.lock @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: eb6f446eaacd58fbff84b6520503d14519011e67e0f6ce3d39a9a2922fb781cb +# input_hash: 51824060cd657aa08f97c645d9c3c1efd6eec66aedaf8949d8d7cb5d9d8e69e6 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.6.15-ha878542_0.tar.bz2#c320890f77fd1d617fa876e0982002c2 @@ -83,7 +83,7 @@ https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.10-h9b69904_4.tar.b https://conda.anaconda.org/conda-forge/linux-64/libllvm14-14.0.6-he0ac6c6_0.tar.bz2#f5759f0c80708fbf9c4836c0cb46d0fe https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.47.0-hdcd2b5c_1.tar.bz2#6fe9e31c2b8d0b022626ccac13e6ca3c https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.37-h753d276_4.tar.bz2#6b611734b73d639c084ac4be2fcd996a -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.39.2-h753d276_1.tar.bz2#90136dc0a305db4e1df24945d431457b +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.39.3-h753d276_0.tar.bz2#ccb2457c73609f2622b8a4b3e42e5d8b https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.10.0-haa6b8db_3.tar.bz2#89acee135f0809a18a1f4537390aa2dd https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 @@ -112,7 +112,7 @@ https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_openb https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h0e0dad5_3.tar.bz2#5627d42c13a9b117ae1701c6e195624f https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.0.3-he3ba5ed_0.tar.bz2#f9dbabc7e01c459ed7a1d1d64b206e9b https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.30-h28c427c_1.tar.bz2#0bd292db365c83624316efc2764d9f16 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.2-h4ff8645_1.tar.bz2#2676ec698ce91567fca50654ac1b18ba +https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.3-h4ff8645_0.tar.bz2#f03cf4ec974e32b6c5d349f62637e36e https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-h166bdaf_0.tar.bz2#384e7fcb3cd162ba3e4aed4b687df566 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h166bdaf_0.tar.bz2#637054603bb7594302e3bf83f0a99879 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-h166bdaf_0.tar.bz2#732e22f1741bccea861f5668cf7342a7 @@ -175,10 +175,12 @@ https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.0-pyhd8 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7 +https://conda.anaconda.org/conda-forge/noarch/tenacity-8.0.1-pyhd8ed1ab_0.tar.bz2#8b29b2c12cb21dbd057755e5fd22d005 https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.3.0-pyha770c72_0.tar.bz2#a9d85960bc62d53cc4ea0d1d27f73c98 https://conda.anaconda.org/conda-forge/noarch/wheel-0.37.1-pyhd8ed1ab_0.tar.bz2#1ca02aaf78d9c70d9a81a3bed5752022 https://conda.anaconda.org/conda-forge/noarch/zipp-3.8.1-pyhd8ed1ab_0.tar.bz2#a3508a0c850745b875de88aea4c40cc5 https://conda.anaconda.org/conda-forge/noarch/babel-2.10.3-pyhd8ed1ab_0.tar.bz2#72f1c6d03109d7a70087bc1d029a8eda @@ -198,6 +200,7 @@ https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz https://conda.anaconda.org/conda-forge/noarch/partd-1.3.0-pyhd8ed1ab_0.tar.bz2#af8c82d121e63082926062d61d9abb54 https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py39hd5dbb17_2.tar.bz2#3b74a959f6a8008f5901de60b3572c09 https://conda.anaconda.org/conda-forge/noarch/pip-22.2.2-pyhd8ed1ab_0.tar.bz2#0b43abe4d3ee93e82742d37def53a836 +https://conda.anaconda.org/conda-forge/noarch/plotly-5.10.0-pyhd8ed1ab_0.tar.bz2#e95502aa0f8e3db05d198214472575de https://conda.anaconda.org/conda-forge/linux-64/pluggy-1.0.0-py39hf3d152e_3.tar.bz2#c375c89340e563053f3656c7f134d265 https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.2-py39hb9d737c_0.tar.bz2#1e7ffe59e21862559e06b981817e5058 https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-14.0-h7f54b18_8.tar.bz2#f9dbcfbb942ec9a3c0249cb71da5c7d1 @@ -231,14 +234,14 @@ https://conda.anaconda.org/conda-forge/noarch/tifffile-2022.8.12-pyhd8ed1ab_0.ta https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py39h18e9c17_0.tar.bz2#5ed8f83afff3b64fa91f7a6af8d7ff04 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.19.3-py39h1832856_1.tar.bz2#472bb5b9d9eb26ac697902da265ee551 -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.11.2-pyhd8ed1ab_0.tar.bz2#fe2303dc8f1febeb82d927ce8ad153ed +https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.0-pyhd8ed1ab_0.tar.bz2#05ee2fb22c1eca4309c06d11aff049f3 https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.13.2-py39hd257fcd_0.tar.bz2#bd7cdadf70e34a19333c3aacc40206e8 https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.5.3-py39hf3d152e_2.tar.bz2#98bf9bdfbac2ac73bbd1dc12a61519eb https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_1.tar.bz2#089382ee0e2dc2eae33a04cc3c2bddb0 -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.11.2-hd8ed1ab_0.tar.bz2#e56b6a19f4b717eca7c68ad78196b075 +https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.0-hd8ed1ab_0.tar.bz2#c22474d96fa1725ae47def82b5668686 https://conda.anaconda.org/conda-forge/noarch/sphinx-5.1.1-pyhd8ed1ab_1.tar.bz2#cd1129e88f6278787212624e1b7a8001 https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.4.0-pyhd8ed1ab_1.tar.bz2#0aac89c61a466b0f9c4fd0ec44d81f1d https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.11.1-pyhd8ed1ab_0.tar.bz2#729254314a5d178eefca50acbc2687b8 https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/58/ed/59df64b8400caf736f38bd3725ab9b1d9e50874f629980973aea090c1a8b/sphinxext_opengraph-0.6.3-py3-none-any.whl#sha256=bf76017c105856b07edea6caf4942b6ae9bb168585dccfd6dbdb6e4161f6b03a +# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/58/ed/59df64b8400caf736f38bd3725ab9b1d9e50874f629980973aea090c1a8b/sphinxext_opengraph-0.6.3-py3-none-any.whl#md5=None diff --git a/build_tools/github/doc_min_dependencies_environment.yml b/build_tools/github/doc_min_dependencies_environment.yml index 9f20b9e900336..d1c5c4f2053e2 100644 --- a/build_tools/github/doc_min_dependencies_environment.yml +++ b/build_tools/github/doc_min_dependencies_environment.yml @@ -25,6 +25,7 @@ dependencies: - sphinx-gallery=0.7.0 # min - numpydoc=1.2.0 # min - sphinx-prompt=1.3.0 # min + - plotly=5.9.0 # min - pip - pip: - sphinxext-opengraph==0.4.2 # min diff --git a/build_tools/github/doc_min_dependencies_linux-64_conda.lock b/build_tools/github/doc_min_dependencies_linux-64_conda.lock index f62c790e7abde..dac55e8cec09d 100644 --- a/build_tools/github/doc_min_dependencies_linux-64_conda.lock +++ b/build_tools/github/doc_min_dependencies_linux-64_conda.lock @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: d7c5b73a8e5033accbbf23734674ee617f6e77fbc98fe96e654c88b245458829 +# input_hash: 53d568af487a4ac9a260bfa1bcd5263594b6a4aef53e17e7ccf17e2e2165ab96 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.6.15-ha878542_0.tar.bz2#c320890f77fd1d617fa876e0982002c2 @@ -45,7 +45,7 @@ https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-7.5.0-h56 https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-7.5.0-hd0bb8aa_20.tar.bz2#dbe78fc5fb9c339f8e55426559e12f7b https://conda.anaconda.org/conda-forge/linux-64/libllvm9-9.0.1-default_hc23dcda_7.tar.bz2#9f4686a2c319355fe8636ca13783c3b4 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.37-h753d276_4.tar.bz2#6b611734b73d639c084ac4be2fcd996a -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.39.2-h753d276_1.tar.bz2#90136dc0a305db4e1df24945d431457b +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.39.3-h753d276_0.tar.bz2#ccb2457c73609f2622b8a4b3e42e5d8b https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-14.0.4-he0ac6c6_0.tar.bz2#cecc6e3cb66570ffcfb820c637890f54 https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa @@ -61,7 +61,7 @@ https://conda.anaconda.org/conda-forge/linux-64/libglib-2.66.3-hbe7bbb4_0.tar.bz https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h0e0dad5_3.tar.bz2#5627d42c13a9b117ae1701c6e195624f https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.10-hee79883_0.tar.bz2#0217b0926808b1adf93247bba489d733 https://conda.anaconda.org/conda-forge/linux-64/mkl-2020.4-h726a3e6_304.tar.bz2#b9b35a50e5377b19da6ec0709ae77fc3 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.2-h4ff8645_1.tar.bz2#2676ec698ce91567fca50654ac1b18ba +https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.39.3-h4ff8645_0.tar.bz2#f03cf4ec974e32b6c5d349f62637e36e https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.1.1-hc9558a2_0.tar.bz2#1eb7c67eb11eab0c98a87f84174fdde1 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.0-h8e229c2_0.tar.bz2#f314f79031fec74adc9bff50fbaffd89 https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.1.1-he991be0_0.tar.bz2#e38ac82cc517b9e245c1ae99f9f140da @@ -102,9 +102,11 @@ https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.0-pyhd8 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7 +https://conda.anaconda.org/conda-forge/noarch/tenacity-8.0.1-pyhd8ed1ab_0.tar.bz2#8b29b2c12cb21dbd057755e5fd22d005 https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.3.0-pyha770c72_0.tar.bz2#a9d85960bc62d53cc4ea0d1d27f73c98 https://conda.anaconda.org/conda-forge/noarch/wheel-0.37.1-pyhd8ed1ab_0.tar.bz2#1ca02aaf78d9c70d9a81a3bed5752022 https://conda.anaconda.org/conda-forge/noarch/babel-2.10.3-pyhd8ed1ab_0.tar.bz2#72f1c6d03109d7a70087bc1d029a8eda https://conda.anaconda.org/conda-forge/linux-64/cffi-1.14.4-py38ha312104_0.tar.bz2#8f82b87522fbb1d4b24e8b5e2b1d0501 @@ -120,6 +122,7 @@ https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.3-py38h95a1406_0.tar. https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 https://conda.anaconda.org/conda-forge/noarch/partd-1.3.0-pyhd8ed1ab_0.tar.bz2#af8c82d121e63082926062d61d9abb54 https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py38ha3b2c9c_2.tar.bz2#a077cc2bb9d854074b1cf4607252da7a +https://conda.anaconda.org/conda-forge/noarch/plotly-5.9.0-pyhd8ed1ab_0.tar.bz2#00a668931d448ce0ce42d1b02005d636 https://conda.anaconda.org/conda-forge/linux-64/pluggy-1.0.0-py38h578d9bd_3.tar.bz2#6ce4ce3d4490a56eb33b52c179609193 https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.2-py38h0a891b7_0.tar.bz2#907a39b6d7443f770ed755885694f864 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 @@ -148,16 +151,16 @@ https://conda.anaconda.org/conda-forge/noarch/pyopenssl-22.0.0-pyhd8ed1ab_0.tar. https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_0.tar.bz2#95286e05a617de9ebfe3246cecbfb72f https://conda.anaconda.org/conda-forge/linux-64/qt-5.12.5-hd8c4c69_1.tar.bz2#0e105d4afe0c3c81c4fbd9937ec4f359 https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.16.2-py38hb3f55d8_0.tar.bz2#468b398fefac8884cd6e6513af66549b -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.11.2-pyhd8ed1ab_0.tar.bz2#fe2303dc8f1febeb82d927ce8ad153ed +https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.0-pyhd8ed1ab_0.tar.bz2#05ee2fb22c1eca4309c06d11aff049f3 https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38ha8c2ead_3.tar.bz2#242c206b0c30fdc4c18aea16f04c4262 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.12.2-py38h5c078b8_0.tar.bz2#33787719ad03d33cffc4e2e3ea82bc9e https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.1.2-py38_1.tar.bz2#c2b9671a19c01716c37fe0a0e18b0aec https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_1.tar.bz2#089382ee0e2dc2eae33a04cc3c2bddb0 -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.11.2-hd8ed1ab_0.tar.bz2#e56b6a19f4b717eca7c68ad78196b075 +https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.0-hd8ed1ab_0.tar.bz2#c22474d96fa1725ae47def82b5668686 https://conda.anaconda.org/conda-forge/noarch/sphinx-4.0.1-pyh6c4a22f_2.tar.bz2#c203dcc46f262853ecbb9552c50d664e https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.7.0-py_0.tar.bz2#80bad3f857ecc86a4ab73f3e57addd13 https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#sha256=a51f2604f9a5b6c0d25d3a88e694d5c02e20812dc0e482adf96c8628f9109357 +# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#md5=None diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py index e6368be7f9d05..d7be927f26ace 100644 --- a/build_tools/update_environments_and_lock_files.py +++ b/build_tools/update_environments_and_lock_files.py @@ -242,6 +242,7 @@ def remove_from(alist, to_remove): "sphinx-gallery", "numpydoc", "sphinx-prompt", + "plotly", ], "pip_dependencies": ["sphinxext-opengraph"], "package_constraints": { @@ -257,6 +258,7 @@ def remove_from(alist, to_remove): "numpydoc": "min", "sphinx-prompt": "min", "sphinxext-opengraph": "min", + "plotly": "min", }, }, { @@ -274,6 +276,7 @@ def remove_from(alist, to_remove): "sphinx-gallery", "numpydoc", "sphinx-prompt", + "plotly", ], "pip_dependencies": ["sphinxext-opengraph"], "package_constraints": { diff --git a/doc/conf.py b/doc/conf.py index 3676bfbdf2161..9ec60d4b34092 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -29,6 +29,17 @@ import sphinx_gallery from sphinx_gallery.sorting import ExampleTitleSortKey +try: + # Configure plotly to integrate its output into the HTML pages generated by + # sphinx-gallery. + import plotly.io as pio + + pio.renderers.default = "sphinx_gallery" +except ImportError: + # Make it possible to render the doc when not running the examples + # that need plotly. + pass + # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be @@ -257,6 +268,9 @@ "auto_examples/linear_model/plot_bayesian_ridge": ( "auto_examples/linear_model/plot_ard" ), + "examples/model_selection/grid_search_text_feature_extraction.py": ( + "examples/model_selection/plot_grid_search_text_feature_extraction.py" + ), } html_context["redirects"] = redirects for old_link in redirects: diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index c797eb288c6e5..4a61b5ec5f118 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -163,7 +163,7 @@ You can also provide custom feature names for the input data using .. topic:: Examples: * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection_pipeline.py` - * :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py` + * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` * :ref:`sphx_glr_auto_examples_compose_plot_digits_pipe.py` * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_approximation.py` * :ref:`sphx_glr_auto_examples_svm_plot_svm_anova.py` diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 0a2256bfc8505..72bad0bf8ef87 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -295,7 +295,7 @@ section. * :ref:`sphx_glr_auto_examples_model_selection_plot_roc_crossval.py`, * :ref:`sphx_glr_auto_examples_feature_selection_plot_rfe_with_cross_validation.py`, * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py`, - * :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py`, + * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`, * :ref:`sphx_glr_auto_examples_model_selection_plot_cv_predict.py`, * :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`. diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst index 962784fb236bc..5876000f9a1c1 100644 --- a/doc/modules/feature_extraction.rst +++ b/doc/modules/feature_extraction.rst @@ -607,7 +607,7 @@ As usual the best way to adjust the feature extraction parameters is to use a cross-validated grid search, for instance by pipelining the feature extractor with a classifier: - * :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py` + * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` Decoding text files diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index ad3f24f23980a..e4cc62b7773f3 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -77,7 +77,7 @@ evaluated and the best combination is retained. - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py` for an example of Grid Search computation on the digits dataset. - - See :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py` for an example + - See :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` for an example of Grid Search coupling parameters from a text documents feature extractor (n-gram count vectorizer and TF-IDF transformer) with a classifier (here a linear SVM trained with SGD with either elastic diff --git a/examples/model_selection/grid_search_text_feature_extraction.py b/examples/model_selection/grid_search_text_feature_extraction.py deleted file mode 100644 index 91801b361265b..0000000000000 --- a/examples/model_selection/grid_search_text_feature_extraction.py +++ /dev/null @@ -1,129 +0,0 @@ -""" -========================================================== -Sample pipeline for text feature extraction and evaluation -========================================================== - -The dataset used in this example is the 20 newsgroups dataset which will be -automatically downloaded and then cached and reused for the document -classification example. - -You can adjust the number of categories by giving their names to the dataset -loader or setting them to None to get the 20 of them. - -Here is a sample output of a run on a quad-core machine:: - - Loading 20 newsgroups dataset for categories: - ['alt.atheism', 'talk.religion.misc'] - 1427 documents - 2 categories - - Performing grid search... - pipeline: ['vect', 'tfidf', 'clf'] - parameters: - {'clf__alpha': (1.0000000000000001e-05, 9.9999999999999995e-07), - 'clf__max_iter': (10, 50, 80), - 'clf__penalty': ('l2', 'elasticnet'), - 'tfidf__use_idf': (True, False), - 'vect__max_n': (1, 2), - 'vect__max_df': (0.5, 0.75, 1.0), - 'vect__max_features': (None, 5000, 10000, 50000)} - done in 1737.030s - - Best score: 0.940 - Best parameters set: - clf__alpha: 9.9999999999999995e-07 - clf__max_iter: 50 - clf__penalty: 'elasticnet' - tfidf__use_idf: True - vect__max_n: 2 - vect__max_df: 0.75 - vect__max_features: 50000 - -""" - -# Author: Olivier Grisel -# Peter Prettenhofer -# Mathieu Blondel -# License: BSD 3 clause - -# %% -# Data loading -# ------------ - -from pprint import pprint -from time import time -import logging - -from sklearn.datasets import fetch_20newsgroups -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.feature_extraction.text import TfidfTransformer -from sklearn.linear_model import SGDClassifier -from sklearn.model_selection import GridSearchCV -from sklearn.pipeline import Pipeline - -# Display progress logs on stdout -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") - -# Load some categories from the training set -categories = [ - "alt.atheism", - "talk.religion.misc", -] - -# Uncomment the following to do the analysis on all the categories -# categories = None - -print("Loading 20 newsgroups dataset for categories:") -print(categories) - -data = fetch_20newsgroups(subset="train", categories=categories) -print("%d documents" % len(data.filenames)) -print("%d categories" % len(data.target_names)) -print() - -# %% -# Pipeline with hyperparameter tuning -# ----------------------------------- - -# Define a pipeline combining a text feature extractor with a simple classifier -pipeline = Pipeline( - [ - ("vect", CountVectorizer()), - ("tfidf", TfidfTransformer()), - ("clf", SGDClassifier()), - ] -) - -# Parameters to use for grid search. Uncommenting more parameters will give -# better exploring power but will increase processing time in a combinatorial -# way -parameters = { - "vect__max_df": (0.5, 0.75, 1.0), - # 'vect__max_features': (None, 5000, 10000, 50000), - "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams - # 'tfidf__use_idf': (True, False), - # 'tfidf__norm': ('l1', 'l2'), - "clf__max_iter": (20,), - "clf__alpha": (0.00001, 0.000001), - "clf__penalty": ("l2", "elasticnet"), - # 'clf__max_iter': (10, 50, 80), -} - -# Find the best parameters for both the feature extraction and the -# classifier -grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) - -print("Performing grid search...") -print("pipeline:", [name for name, _ in pipeline.steps]) -print("parameters:") -pprint(parameters) -t0 = time() -grid_search.fit(data.data, data.target) -print("done in %0.3fs" % (time() - t0)) -print() - -print("Best score: %0.3f" % grid_search.best_score_) -print("Best parameters set:") -best_parameters = grid_search.best_estimator_.get_params() -for param_name in sorted(parameters.keys()): - print("\t%s: %r" % (param_name, best_parameters[param_name])) diff --git a/examples/model_selection/plot_grid_search_text_feature_extraction.py b/examples/model_selection/plot_grid_search_text_feature_extraction.py new file mode 100644 index 0000000000000..9ad4296aad9b4 --- /dev/null +++ b/examples/model_selection/plot_grid_search_text_feature_extraction.py @@ -0,0 +1,265 @@ +""" +========================================================== +Sample pipeline for text feature extraction and evaluation +========================================================== + +The dataset used in this example is :ref:`20newsgroups_dataset` which will be +automatically downloaded, cached and reused for the document classification +example. + +In this example, we tune the hyperparameters of a particular classifier using a +:class:`~sklearn.model_selection.RandomizedSearchCV`. For a demo on the +performance of some other classifiers, see the +:ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` +notebook. +""" + +# Author: Olivier Grisel +# Peter Prettenhofer +# Mathieu Blondel +# Arturo Amor +# License: BSD 3 clause + +# %% +# Data loading +# ------------ +# We load two categories from the training set. You can adjust the number of +# categories by adding their names to the list or setting `categories=None` when +# calling the dataset loader :func:`~sklearn.datasets.fetch20newsgroups` to get +# the 20 of them. + +from sklearn.datasets import fetch_20newsgroups + +categories = [ + "alt.atheism", + "talk.religion.misc", +] + +data_train = fetch_20newsgroups( + subset="train", + categories=categories, + shuffle=True, + random_state=42, + remove=("headers", "footers", "quotes"), +) + +data_test = fetch_20newsgroups( + subset="test", + categories=categories, + shuffle=True, + random_state=42, + remove=("headers", "footers", "quotes"), +) + +print(f"Loading 20 newsgroups dataset for {len(data_train.target_names)} categories:") +print(data_train.target_names) +print(f"{len(data_train.data)} documents") + +# %% +# Pipeline with hyperparameter tuning +# ----------------------------------- +# +# We define a pipeline combining a text feature vectorizer with a simple +# classifier yet effective for text classification. + +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.naive_bayes import ComplementNB +from sklearn.pipeline import Pipeline + +pipeline = Pipeline( + [ + ("vect", TfidfVectorizer()), + ("clf", ComplementNB()), + ] +) +pipeline + +# %% +# We define a grid of hyperparameters to be explored by the +# :class:`~sklearn.model_selection.RandomizedSearchCV`. Using a +# :class:`~sklearn.model_selection.GridSearchCV` instead would explore all the +# possible combinations on the grid, which can be costly to compute, whereas the +# parameter `n_iter` of the :class:`~sklearn.model_selection.RandomizedSearchCV` +# controls the number of different random combination that are evaluated. Notice +# that setting `n_iter` larger than the number of possible combinations in a +# grid would lead to repeating already-explored combinations. We search for the +# best parameter combination for both the feature extraction (`vect__`) and the +# classifier (`clf__`). + +import numpy as np + +parameter_grid = { + "vect__max_df": (0.2, 0.4, 0.6, 0.8, 1.0), + "vect__min_df": (1, 3, 5, 10), + "vect__ngram_range": ((1, 1), (1, 2)), # unigrams or bigrams + "vect__norm": ("l1", "l2"), + "clf__alpha": np.logspace(-6, 6, 13), +} + +# %% +# In this case `n_iter=40` is not an exhaustive search of the hyperparameters' +# grid. In practice it would be interesting to increase the parameter `n_iter` +# to get a more informative analysis. As a consequence, the computional time +# increases. We can reduce it by taking advantage of the parallelisation over +# the parameter combinations evaluation by increasing the number of CPUs used +# via the parameter `n_jobs`. + +from pprint import pprint +from sklearn.model_selection import RandomizedSearchCV + +random_search = RandomizedSearchCV( + estimator=pipeline, + param_distributions=parameter_grid, + n_iter=40, + random_state=0, + n_jobs=2, + verbose=1, +) + +print("Performing grid search...") +print("Hyperparameters to be evaluated:") +pprint(parameter_grid) + +# %% +from time import time + +t0 = time() +random_search.fit(data_train.data, data_train.target) +print(f"Done in {time() - t0:.3f}s") + +# %% +print("Best parameters combination found:") +best_parameters = random_search.best_estimator_.get_params() +for param_name in sorted(parameter_grid.keys()): + print(f"{param_name}: {best_parameters[param_name]}") + +# %% +test_accuracy = random_search.score(data_test.data, data_test.target) +print( + "Accuracy of the best parameters using the inner CV of " + f"the random search: {random_search.best_score_:.3f}" +) +print(f"Accuracy on test set: {test_accuracy:.3f}") + +# %% +# The prefixes `vect` and `clf` are required to avoid possible ambiguities in +# the pipeline, but are not necessary for visualizing the results. Because of +# this, we define a function that will rename the tuned hyperparameters and +# improve the readability. + +import pandas as pd + + +def shorten_param(param_name): + """Remove components' prefixes in param_name.""" + if "__" in param_name: + return param_name.rsplit("__", 1)[1] + return param_name + + +cv_results = pd.DataFrame(random_search.cv_results_) +cv_results = cv_results.rename(shorten_param, axis=1) + +# %% +# We can use a `plotly.express.scatter +# `_ +# to visualize the trade-off between scoring time and mean test score (i.e. "CV +# score"). Passing the cursor over a given point displays the corresponding +# parameters. Error bars correspond to one standard deviation as computed in the +# different folds of the cross-validation. + +import plotly.express as px + +param_names = [shorten_param(name) for name in parameter_grid.keys()] +labels = { + "mean_score_time": "CV Score time (s)", + "mean_test_score": "CV score (accuracy)", +} +fig = px.scatter( + cv_results, + x="mean_score_time", + y="mean_test_score", + error_x="std_score_time", + error_y="std_test_score", + hover_data=param_names, + labels=labels, +) +fig.update_layout( + title={ + "text": "trade-off between scoring time and mean test score", + "y": 0.95, + "x": 0.5, + "xanchor": "center", + "yanchor": "top", + } +) +fig + +# %% +# Notice that the cluster of models in the upper-left corner of the plot have +# the best trade-off between accuracy and scoring time. In this case, using +# bigrams increases the required scoring time without improving considerably the +# accuracy of the pipeline. +# +# .. note:: For more information on how to customize an automated tuning to +# maximize score and minimize scoring time, see the example notebook +# :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py`. +# +# We can also use a `plotly.express.parallel_coordinates +# `_ +# to further visualize the mean test score as a function of the tuned +# hyperparameters. This helps finding interactions between more than two +# hyperparameters and provide intuition on their relevance for improving the +# performance of a pipeline. +# +# We apply a `math.log10` transformation on the `alpha` axis to spread the +# active range and improve the readability of the plot. A value :math:`x` on +# said axis is to be understood as :math:`10^x`. + +import math + +column_results = param_names + ["mean_test_score", "mean_score_time"] + +transform_funcs = dict.fromkeys(column_results, lambda x: x) +# Using a logarithmic scale for alpha +transform_funcs["alpha"] = math.log10 +# L1 norms are mapped to index 1, and L2 norms to index 2 +transform_funcs["norm"] = lambda x: 2 if x == "l2" else 1 +# Unigrams are mapped to index 1 and bigrams to index 2 +transform_funcs["ngram_range"] = lambda x: x[1] + +fig = px.parallel_coordinates( + cv_results[column_results].apply(transform_funcs), + color="mean_test_score", + color_continuous_scale=px.colors.sequential.Viridis_r, + labels=labels, +) +fig.update_layout( + title={ + "text": "Parallel coordinates plot of text classifier pipeline", + "y": 0.99, + "x": 0.5, + "xanchor": "center", + "yanchor": "top", + } +) +fig + +# %% +# The parallel coordinates plot displays the values of the hyperparameters on +# different columns while the performance metric is color coded. It is possible +# to select a range of results by clicking and holding on any axis of the +# parallel coordinate plot. You can then slide (move) the range selection and +# cross two selections to see the intersections. You can undo a selection by +# clicking once again on the same axis. +# +# In particular for this hyperparameter search, it is interesting to notice that +# the top performing models do not seem to depend on the regularization `norm`, +# but they do depend on a trade-off between `max_df`, `min_df` and the +# regularization strength `alpha`. The reason is that including noisy features +# (i.e. `max_df` close to :math:`1.0` or `min_df` close to :math:`0`) tend to +# overfit and therefore require a stronger regularization to compensate. Having +# less features require less regularization and less scoring time. +# +# The best accuracy scores are obtained when `alpha` is between :math:`10^{-6}` +# and :math:`10^0`, regardless of the hyperparameter `norm`. diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py index 7eb14a94a724f..f86e2a534b6ec 100644 --- a/examples/text/plot_document_classification_20newsgroups.py +++ b/examples/text/plot_document_classification_20newsgroups.py @@ -357,7 +357,10 @@ def benchmark(clf, custom_name=False): # such a multi-class text classification problem. # # Notice that the most important hyperparameters values were tuned using a grid -# search procedure not shown in this notebook for the sake of simplicity. +# search procedure not shown in this notebook for the sake of simplicity. See +# the example script +# :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` +# for a demo on how such tuning can be done. from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py index 594c074000597..58c4d25b461ca 100644 --- a/sklearn/_min_dependencies.py +++ b/sklearn/_min_dependencies.py @@ -47,6 +47,7 @@ "Pillow": ("7.1.2", "docs"), "sphinx-prompt": ("1.3.0", "docs"), "sphinxext-opengraph": ("0.4.2", "docs"), + "plotly": ("5.9.0", "docs, examples"), # XXX: Pin conda-lock to the latest released version (needs manual update # from time to time) "conda-lock": ("1.1.1", "maintenance"), diff --git a/sklearn/datasets/descr/twenty_newsgroups.rst b/sklearn/datasets/descr/twenty_newsgroups.rst index 3a327a4cbc19c..8e373c6ec3e74 100644 --- a/sklearn/datasets/descr/twenty_newsgroups.rst +++ b/sklearn/datasets/descr/twenty_newsgroups.rst @@ -215,9 +215,9 @@ It loses even more if we also strip this metadata from the training data: >>> metrics.f1_score(newsgroups_test.target, pred, average='macro') 0.76995... -Some other classifiers cope better with this harder version of the task. Try -running :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py` with and without -the ``--filter`` option to compare the results. +Some other classifiers cope better with this harder version of the task. Try the +:ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` +example with and without the `remove` option to compare the results. .. topic:: Data Considerations @@ -248,6 +248,10 @@ the ``--filter`` option to compare the results. .. topic:: Examples - * :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py` + * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` * :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` + + * :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py` + + * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`