diff --git a/.binder/requirements.txt b/.binder/requirements.txt index 51ca95be6785e..bd2b70f5f43b0 100644 --- a/.binder/requirements.txt +++ b/.binder/requirements.txt @@ -1,4 +1,4 @@ ---find-links https://pypi.anaconda.org/scipy-wheels-nightly/simple/scikit-learn +--find-links https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/scikit-learn --pre matplotlib scikit-image @@ -7,3 +7,4 @@ seaborn Pillow sphinx-gallery scikit-learn +polars diff --git a/.circleci/config.yml b/.circleci/config.yml index 91f0ce0a92d8e..1f9a1a02e0f62 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,66 +1,104 @@ version: 2.1 -# Parameters required to trigger the execution -# of the "doc-min-dependencies" and "doc" jobs -parameters: - GITHUB_RUN_URL: - type: string - default: "none" - jobs: - doc-min-dependencies: + lint: docker: - - image: cimg/python:3.8.12 - environment: - - GITHUB_ARTIFACT_URL: << pipeline.parameters.GITHUB_RUN_URL >>/doc-min-dependencies.zip + - image: cimg/python:3.9.18 steps: - checkout - - run: bash build_tools/circle/download_documentation.sh - - store_artifacts: - path: doc/_build/html/stable - destination: doc + - run: + name: dependencies + command: | + source build_tools/shared.sh + # Include pytest compatibility with mypy + pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint + - run: + name: linting + command: ./build_tools/linting.sh - doc: + doc-min-dependencies: docker: - - image: cimg/python:3.8.12 + - image: cimg/python:3.9.18 environment: - - GITHUB_ARTIFACT_URL: << pipeline.parameters.GITHUB_RUN_URL >>/doc.zip + - MKL_NUM_THREADS: 2 + - OPENBLAS_NUM_THREADS: 2 + - CONDA_ENV_NAME: testenv + - LOCK_FILE: build_tools/circle/doc_min_dependencies_linux-64_conda.lock + # Do not fail if the documentation build generates warnings with minimum + # dependencies as long as we can avoid raising warnings with more recent + # versions of the same dependencies. + - SKLEARN_WARNINGS_AS_ERRORS: '0' steps: - checkout - - run: bash build_tools/circle/download_documentation.sh + - run: ./build_tools/circle/checkout_merge_commit.sh + - restore_cache: + key: v1-doc-min-deps-datasets-{{ .Branch }} + - restore_cache: + keys: + - doc-min-deps-ccache-{{ .Branch }} + - doc-min-deps-ccache + - run: ./build_tools/circle/build_doc.sh + - save_cache: + key: doc-min-deps-ccache-{{ .Branch }}-{{ .BuildNum }} + paths: + - ~/.ccache + - ~/.cache/pip + - save_cache: + key: v1-doc-min-deps-datasets-{{ .Branch }} + paths: + - ~/scikit_learn_data - store_artifacts: path: doc/_build/html/stable destination: doc - # Persists the generated documentation, so that it - # can be attached and deployed in the "deploy" job - - persist_to_workspace: - root: doc/_build/html - paths: . + - store_artifacts: + path: ~/log.txt + destination: log.txt - linux-arm64: - machine: - image: ubuntu-2004:202101-01 - resource_class: arm.medium + doc: + docker: + - image: cimg/python:3.9.18 environment: - - OMP_NUM_THREADS: 2 + - MKL_NUM_THREADS: 2 - OPENBLAS_NUM_THREADS: 2 - CONDA_ENV_NAME: testenv - - LOCK_FILE: build_tools/circle/py39_conda_forge_linux-aarch64_conda.lock + - LOCK_FILE: build_tools/circle/doc_linux-64_conda.lock + # Make sure that we fail if the documentation build generates warnings with + # recent versions of the dependencies. + - SKLEARN_WARNINGS_AS_ERRORS: '1' steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh - restore_cache: - key: linux-arm64-{{ .Branch }} - - run: ./build_tools/circle/build_test_arm.sh + key: v1-doc-datasets-{{ .Branch }} + - restore_cache: + keys: + - doc-ccache-{{ .Branch }} + - doc-ccache + - run: ./build_tools/circle/build_doc.sh - save_cache: - key: linux-arm64-{{ .Branch }} + key: doc-ccache-{{ .Branch }}-{{ .BuildNum }} paths: - - ~/.cache/ccache + - ~/.ccache - ~/.cache/pip + - save_cache: + key: v1-doc-datasets-{{ .Branch }} + paths: - ~/scikit_learn_data + - store_artifacts: + path: doc/_build/html/stable + destination: doc + - store_artifacts: + path: ~/log.txt + destination: log.txt + # Persists generated documentation so that it can be attached and deployed + # in the 'deploy' step. + - persist_to_workspace: + root: doc/_build/html + paths: . + deploy: docker: - - image: cimg/python:3.8.12 + - image: cimg/python:3.9.18 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh @@ -77,23 +115,15 @@ jobs: workflows: version: 2 - build-doc-and-deploy: - when: - not: - equal: [ "none", << pipeline.parameters.GITHUB_RUN_URL >> ] - # The jobs should run only when triggered by the workflow jobs: - - doc-min-dependencies - - doc + - lint + - doc: + requires: + - lint + - doc-min-dependencies: + requires: + - lint - deploy: requires: - doc - - linux-arm64: - when: - equal: [ "none", << pipeline.parameters.GITHUB_RUN_URL >> ] - # Prevent double execution of this job: on push - # by default and when triggered by the workflow - jobs: - - linux-arm64 diff --git a/.cirrus.star b/.cirrus.star new file mode 100644 index 0000000000000..f0b458d74289a --- /dev/null +++ b/.cirrus.star @@ -0,0 +1,37 @@ +# This script uses starlark for configuring when a cirrus CI job runs: +# https://cirrus-ci.org/guide/programming-tasks/ + +load("cirrus", "env", "fs", "http") + +def main(ctx): + # Only run for scikit-learn/scikit-learn. For debugging on a fork, you can + # comment out the following condition. + if env.get("CIRRUS_REPO_FULL_NAME") != "scikit-learn/scikit-learn": + return [] + + arm_wheel_yaml = "build_tools/cirrus/arm_wheel.yml" + arm_tests_yaml = "build_tools/cirrus/arm_tests.yml" + + # Nightly jobs always run + if env.get("CIRRUS_CRON", "") == "nightly": + return fs.read(arm_wheel_yaml) + fs.read(arm_tests_yaml) + + # Get commit message for event. We can not use `git` here because there is + # no command line access in starlark. Thus we need to query the GitHub API + # for the commit message. Note that `CIRRUS_CHANGE_MESSAGE` can not be used + # because it is set to the PR's title and not the latest commit message. + SHA = env.get("CIRRUS_CHANGE_IN_REPO") + REPO = env.get("CIRRUS_REPO_FULL_NAME") + url = "https://api.github.com/repos/" + REPO + "/git/commits/" + SHA + response = http.get(url).json() + commit_msg = response["message"] + + jobs_to_run = "" + + if "[cd build]" in commit_msg or "[cd build cirrus]" in commit_msg: + jobs_to_run += fs.read(arm_wheel_yaml) + + if "[cirrus arm]" in commit_msg: + jobs_to_run += fs.read(arm_tests_yaml) + + return jobs_to_run diff --git a/.codecov.yml b/.codecov.yml index d430925ea7508..54ce77b9c1b0e 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -28,4 +28,7 @@ codecov: ignore: - "sklearn/externals" - "sklearn/_build_utils" +- "sklearn/__check_build" +- "sklearn/_min_dependencies.py" - "**/setup.py" +- "**/conftest.py" diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index b331e6899e91d..b261320543fa7 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -25,3 +25,12 @@ # PR 22983: Update to Black 22.3.0 d4aad64b1eb2e42e76f49db2ccfbe4b4660d092b + +# PR 26110: Update black to 23.3.0 +893d5accaf9d16f447645e704f85a216187564f7 + +# PR 26649: Add isort and ruff rules +42173fdb34b5aded79664e045cada719dfbe39dc + +# PR #28802: Update black to 24.3.0 +c4c546355667b070edd5c892b206aa4a97af9a0b diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000..f45e0f29ccfa2 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +.* export-ignore +asv_benchmarks export-ignore +azure-pipelines.yml export-ignore +benchmarks export-ignore +build_tools export-ignore +maint_tools export-ignore diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index df6843304f443..8d9c592ccdc13 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -9,9 +9,9 @@ contact_links: - name: Mailing list url: https://mail.python.org/mailman/listinfo/scikit-learn about: General discussions and announcements on the mailing list - - name: Gitter - url: https://gitter.im/scikit-learn/scikit-learn - about: Users and developers can sometimes be found on the gitter channel + - name: Discord server + url: https://discord.gg/h9qyrK8Jc8 + about: Developers and users can be found on the Discord server - name: Blank issue url: https://github.com/scikit-learn/scikit-learn/issues/new - about: Please note that Github Discussions should be used in most cases instead + about: Please note that GitHub Discussions should be used in most cases instead diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8528d5386b58a..f59f9bc2fbcd7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -26,7 +26,7 @@ review, either the pull request needs some benchmarking, tinkering, convincing, etc. or more likely the reviewers are simply busy. In either case, we ask for your understanding during the review process. For more information, see our FAQ on this topic: -http://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention. +https://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention. Thanks for contributing! --> diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py index ddf9bda3492de..9a689b8db09b4 100644 --- a/.github/scripts/label_title_regex.py +++ b/.github/scripts/label_title_regex.py @@ -1,10 +1,12 @@ """Labels PRs based on title. Must be run in a github action with the pull_request_target event.""" -from github import Github -import os + import json +import os import re +from github import Github + context_dict = json.loads(os.getenv("CONTEXT_GITHUB")) repo = context_dict["repository"] diff --git a/.github/workflows/artifact-redirector.yml b/.github/workflows/artifact-redirector.yml index 23336156470e1..3fdbc06fac386 100644 --- a/.github/workflows/artifact-redirector.yml +++ b/.github/workflows/artifact-redirector.yml @@ -1,13 +1,24 @@ +name: CircleCI artifacts redirector on: [status] + +# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this +# github actions workflow: +# https://docs.github.com/en/actions/security-guides/automatic-token-authentication +permissions: + statuses: write + jobs: circleci_artifacts_redirector_job: runs-on: ubuntu-latest + # For testing this action on a fork, remove the "github.repository =="" condition. + if: "github.repository == 'scikit-learn/scikit-learn' && github.event.context == 'ci/circleci: doc'" name: Run CircleCI artifacts redirector steps: - name: GitHub Action step uses: larsoner/circleci-artifacts-redirector-action@master with: repo-token: ${{ secrets.GITHUB_TOKEN }} + api-token: ${{ secrets.CIRCLECI_TOKEN }} artifact-path: 0/doc/_changed.html circleci-jobs: doc job-title: Check the rendered docs here! diff --git a/.github/workflows/assign.yml b/.github/workflows/assign.yml index f59935ab9f378..fa3b6f95a5e95 100644 --- a/.github/workflows/assign.yml +++ b/.github/workflows/assign.yml @@ -4,6 +4,12 @@ on: issue_comment: types: created +# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this +# github actions workflow: +# https://docs.github.com/en/actions/security-guides/automatic-token-authentication +permissions: + issues: write + jobs: one: runs-on: ubuntu-latest @@ -14,5 +20,8 @@ jobs: steps: - run: | echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" - curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees - curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -X "DELETE" https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels/help%20wanted + gh issue edit $ISSUE --add-assignee ${{ github.event.comment.user.login }} + gh issue edit $ISSUE --remove-label "help wanted" + env: + GH_TOKEN: ${{ github.token }} + ISSUE: ${{ github.event.issue.html_url }} diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml deleted file mode 100644 index a57abe7214504..0000000000000 --- a/.github/workflows/build-docs.yml +++ /dev/null @@ -1,75 +0,0 @@ -# Workflow to build the documentation -name: Documentation builder - -on: - push: - branches: - - main - # Release branches - - "[0-9]+.[0-9]+.X" - pull_request: - branches: - - main - - "[0-9]+.[0-9]+.X" - -jobs: - # Build the documentation against the minimum version of the dependencies - doc-min-dependencies: - # This prevents this workflow from running on a fork. - # To test this workflow on a fork, uncomment the following line. - if: github.repository == 'scikit-learn/scikit-learn' - - runs-on: ubuntu-latest - steps: - - name: Checkout scikit-learn - uses: actions/checkout@v3 - with: - # needed by build_doc.sh to compute the list of changed doc files: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.sha }} - - - name: Build documentation - run: bash build_tools/github/build_doc.sh - env: - OMP_NUM_THREADS: 2 - MKL_NUM_THREADS: 2 - CONDA_ENV_NAME: testenv - # Sphinx race condition in doc-min-dependencies is causing job to stall - # Here we run the job serially - SPHINX_NUMJOBS: 1 - LOCK_FILE: build_tools/github/doc_min_dependencies_linux-64_conda.lock - - - name: Upload documentation - uses: actions/upload-artifact@v3 - with: - name: doc-min-dependencies - path: doc/_build/html/stable - - # Build the documentation against the latest version of the dependencies - doc: - # This prevents this workflow from running on a fork. - # To test this workflow on a fork, uncomment the following line. - if: github.repository == 'scikit-learn/scikit-learn' - - runs-on: ubuntu-latest - steps: - - name: Checkout scikit-learn - uses: actions/checkout@v3 - with: - # needed by build_doc.sh to compute the list of changed doc files: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.sha }} - - - name: Build documentation - run: bash build_tools/github/build_doc.sh - env: - OMP_NUM_THREADS: 2 - MKL_NUM_THREADS: 2 - CONDA_ENV_NAME: testenv - LOCK_FILE: build_tools/github/doc_linux-64_conda.lock - - - name: Upload documentation - uses: actions/upload-artifact@v3 - with: - name: doc - path: doc/_build/html/stable diff --git a/.github/workflows/check-manifest.yml b/.github/workflows/check-sdist.yml similarity index 71% rename from .github/workflows/check-manifest.yml rename to .github/workflows/check-sdist.yml index 004cc452e385e..c02af711bdb6c 100644 --- a/.github/workflows/check-manifest.yml +++ b/.github/workflows/check-sdist.yml @@ -1,33 +1,33 @@ -name: "Check Manifest" +name: "Check sdist" on: schedule: - cron: '0 0 * * *' jobs: - check-manifest: + check-sdist: # Don't run on forks if: github.repository == 'scikit-learn/scikit-learn' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' - name: Install dependencies # scipy and cython are required to build sdist run: | python -m pip install --upgrade pip - pip install check-manifest scipy cython + pip install check-sdist - run: | - check-manifest -v + check-sdist --inject-junk update-tracker: uses: ./.github/workflows/update_tracking_issue.yml if: ${{ always() }} - needs: [check-manifest] + needs: [check-sdist] with: - job_status: ${{ needs.check-manifest.result }} + job_status: ${{ needs.check-sdist.result }} secrets: BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000000..4d38b22d71ab8 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,73 @@ +name: "CodeQL" + +on: + push: + branches: [ "main", "*.X" ] + pull_request: + branches: [ "main", "*.X" ] + schedule: + - cron: '0 6 * * 1' + +jobs: + analyze: + name: Analyze + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: 'ubuntu-latest' + timeout-minutes: 360 + permissions: + # required for all workflows + security-events: write + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + language: [ 'javascript-typescript', 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/labeler-module.yml b/.github/workflows/labeler-module.yml index 938b61f2e0cf9..468d3282903f2 100644 --- a/.github/workflows/labeler-module.yml +++ b/.github/workflows/labeler-module.yml @@ -3,11 +3,18 @@ on: pull_request_target: types: [opened] +# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this +# github actions workflow: +# https://docs.github.com/en/actions/security-guides/automatic-token-authentication +permissions: + contents: read + pull-requests: write + jobs: triage: runs-on: ubuntu-latest steps: - - uses: thomasjpfan/labeler@v2.5.0 + - uses: thomasjpfan/labeler@v2.5.1 continue-on-error: true if: github.repository == 'scikit-learn/scikit-learn' with: @@ -18,7 +25,7 @@ jobs: triage_file_extensions: runs-on: ubuntu-latest steps: - - uses: thomasjpfan/labeler@v2.5.0 + - uses: thomasjpfan/labeler@v2.5.1 continue-on-error: true if: github.repository == 'scikit-learn/scikit-learn' with: diff --git a/.github/workflows/labeler-title-regex.yml b/.github/workflows/labeler-title-regex.yml index 85ce19714758e..10195eca13a73 100644 --- a/.github/workflows/labeler-title-regex.yml +++ b/.github/workflows/labeler-title-regex.yml @@ -3,6 +3,9 @@ on: pull_request_target: types: [opened, edited] +# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this +# github actions workflow: +# https://docs.github.com/en/actions/security-guides/automatic-token-authentication permissions: contents: read pull-requests: write @@ -13,7 +16,7 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' - name: Install PyGithub diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000000..fdc993c1b3fdd --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,103 @@ +# This linter job on GH actions is used to trigger the commenter bot +# in bot-lint-comment.yml file. It stores the output of the linter to be used +# by the commenter bot. +name: linter + +on: + - pull_request_target + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + + # setting any permission will set everything else to none for GITHUB_TOKEN + permissions: + pull-requests: none + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install dependencies + run: | + source build_tools/shared.sh + # Include pytest compatibility with mypy + pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint + # we save the versions of the linters to be used in the error message later. + python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt + python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt + python -c "from importlib.metadata import version; print(f\"black={version('black')}\")" >> /tmp/versions.txt + python -c "from importlib.metadata import version; print(f\"cython-lint={version('cython-lint')}\")" >> /tmp/versions.txt + + - name: Run linting + id: lint-script + # We download the linting script from main, since this workflow is run + # from main itself. + run: | + curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/linting.sh --retry 5 -o ./build_tools/linting.sh + set +e + ./build_tools/linting.sh &> /tmp/linting_output.txt + cat /tmp/linting_output.txt + + - name: Upload Artifact + if: always() + uses: actions/upload-artifact@v3 + with: + name: lint-log + path: | + /tmp/linting_output.txt + /tmp/versions.txt + retention-days: 1 + + comment: + needs: lint + if: ${{ !cancelled() }} + runs-on: ubuntu-latest + + # We need these permissions to be able to post / update comments + permissions: + pull-requests: write + issues: write + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + + - name: Install dependencies + run: python -m pip install requests + + - name: Download artifact + id: download-artifact + uses: actions/download-artifact@v3 + with: + name: lint-log + + - name: Print log + run: cat linting_output.txt + + - name: Process Comments + id: process-comments + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + BRANCH_SHA: ${{ github.event.pull_request.head.sha }} + RUN_ID: ${{ github.run_id }} + LOG_FILE: linting_output.txt + VERSIONS_FILE: versions.txt + run: python ./build_tools/get_comment.py diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index cca5c3f6adf73..b8940ae133ad9 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -13,9 +13,13 @@ on: jobs: publish: runs-on: ubuntu-latest + environment: publish_pypi + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.8' - name: Install dependencies @@ -35,15 +39,10 @@ jobs: run: | python build_tools/github/check_wheels.py - name: Publish package to TestPyPI - uses: pypa/gh-action-pypi-publish@v1.4.1 + uses: pypa/gh-action-pypi-publish@v1.8.5 with: - user: __token__ - password: ${{ secrets.TEST_PYPI_TOKEN }} repository_url: https://test.pypi.org/legacy/ if: ${{ github.event.inputs.pypi_repo == 'testpypi' }} - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@v1.4.1 - with: - user: __token__ - password: ${{ secrets.PYPI_TOKEN }} + uses: pypa/gh-action-pypi-publish@v1.8.5 if: ${{ github.event.inputs.pypi_repo == 'pypi' }} diff --git a/.github/workflows/trigger-hosting.yml b/.github/workflows/trigger-hosting.yml deleted file mode 100644 index 456ce68722e42..0000000000000 --- a/.github/workflows/trigger-hosting.yml +++ /dev/null @@ -1,30 +0,0 @@ -# Workflow to trigger the jobs that will host the documentation -name: Documentation push trigger -on: - workflow_run: - # Run the workflow after the separate "Documentation builder" workflow completes - workflows: [Documentation builder] - types: - - completed - -jobs: - push: - runs-on: ubuntu-latest - # Run the job only if the "Documentation builder" workflow succeeded - # Prevents this workflow from running on a fork. - # To test this workflow on a fork remove the `github.repository == scikit-learn/scikit-learn` condition - if: github.repository == 'scikit-learn/scikit-learn' && github.event.workflow_run.conclusion == 'success' - steps: - - name: Checkout scikit-learn - uses: actions/checkout@v3 - - - name: Trigger hosting jobs - run: bash build_tools/github/trigger_hosting.sh - env: - CIRCLE_CI_TOKEN: ${{ secrets.CIRCLE_CI_TOKEN }} - EVENT: ${{ github.event.workflow_run.event }} - RUN_ID: ${{ github.event.workflow_run.id }} - HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }} - COMMIT_SHA: ${{ github.event.workflow_run.head_sha }} - REPO_NAME: ${{ github.event.workflow_run.head_repository.full_name }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/twitter.yml b/.github/workflows/twitter.yml deleted file mode 100644 index 96b32ec902efa..0000000000000 --- a/.github/workflows/twitter.yml +++ /dev/null @@ -1,26 +0,0 @@ -# Tweet the URL of a commit on @sklearn_commits whenever a push event -# happens on the main branch -name: Twitter Push Notification - - -on: - push: - branches: - - main - - -jobs: - tweet: - name: Twitter Notification - runs-on: ubuntu-latest - steps: - - name: Tweet URL of last commit as @sklearn_commits - if: github.repository == 'scikit-learn/scikit-learn' - uses: docker://thomasjpfan/twitter-action:0.3 - with: - args: "-message \"https://github.com/scikit-learn/scikit-learn/commit/${{ github.sha }}\"" - env: - TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} - TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} diff --git a/.github/workflows/unassign.yml b/.github/workflows/unassign.yml index 0f4e78478b810..94a50d49839d6 100644 --- a/.github/workflows/unassign.yml +++ b/.github/workflows/unassign.yml @@ -4,6 +4,12 @@ on: issues: types: unassigned +# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this +# github actions workflow: +# https://docs.github.com/en/actions/security-guides/automatic-token-authentication +permissions: + issues: write + jobs: one: runs-on: ubuntu-latest @@ -12,4 +18,7 @@ jobs: if: github.event.issue.state == 'open' run: | echo "Marking issue ${{ github.event.issue.number }} as help wanted" - curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"labels": ["help wanted"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/labels + gh issue edit $ISSUE --add-label "help wanted" + env: + GH_TOKEN: ${{ github.token }} + ISSUE: ${{ github.event.issue.html_url }} diff --git a/.github/workflows/update-lock-files.yml b/.github/workflows/update-lock-files.yml new file mode 100644 index 0000000000000..50d62c85d00a6 --- /dev/null +++ b/.github/workflows/update-lock-files.yml @@ -0,0 +1,71 @@ +# Workflow to update lock files +name: Update lock files + +on: + workflow_dispatch: + schedule: + - cron: '0 5 * * 1' + +jobs: + update_lock_files: + if: github.repository == 'scikit-learn/scikit-learn' + runs-on: ubuntu-latest + + strategy: + # Ensure that each build will continue even if one build in the matrix fails + fail-fast: false + matrix: + include: + - name: main + update_script_args: "--select-tag main-ci" + additional_commit_message: "[doc build]" + - name: scipy-dev + update_script_args: "--select-tag scipy-dev" + additional_commit_message: "[scipy-dev]" + - name: cirrus-arm + update_script_args: "--select-tag arm" + additional_commit_message: "[cirrus arm]" + - name: pypy + update_script_args: "--select-tag pypy" + additional_commit_message: "[pypy]" + + steps: + - uses: actions/checkout@v4 + - name: Generate lock files + run: | + source build_tools/shared.sh + source $CONDA/bin/activate + conda install -n base conda conda-libmamba-solver -y + conda config --set solver libmamba + conda install -c conda-forge "$(get_dep conda-lock min)" -y + + python build_tools/update_environments_and_lock_files.py ${{ matrix.update_script_args }} + + - name: Create Pull Request + id: cpr + uses: peter-evans/create-pull-request@v5 + with: + token: ${{ secrets.BOT_GITHUB_TOKEN }} + push-to-fork: scikit-learn-bot/scikit-learn + commit-message: Update CI lock files ${{ matrix.additional_commit_message }} + committer: "Lock file bot " + author: "Lock file bot " + delete-branch: true + branch: auto-update-lock-files-${{ matrix.name }} + title: ":lock: :robot: CI Update lock files for ${{ matrix.name }} CI build(s) :lock: :robot:" + body: | + Update lock files. + + ### Note + If the CI tasks fail, create a new branch based on this PR and add the required fixes to that branch. + + - name: Check Pull Request + if: steps.cpr.outputs.pull-request-number != '' + run: | + echo "### :rocket: Pull-Request Summary" >> ${GITHUB_STEP_SUMMARY} + echo "" >> ${GITHUB_STEP_SUMMARY} + echo "The following lock files pull-request has been auto-generated:" + echo "- **PR** #${{ steps.cpr.outputs.pull-request-number }}" >> ${GITHUB_STEP_SUMMARY} + echo "- **URL** ${{ steps.cpr.outputs.pull-request-url }}" >> ${GITHUB_STEP_SUMMARY} + echo "- **Operation** [${{ steps.cpr.outputs.pull-request-operation }}]" >> ${GITHUB_STEP_SUMMARY} + echo "- **SHA** ${{ steps.cpr.outputs.pull-request-head-sha }}" >> ${GITHUB_STEP_SUMMARY} diff --git a/.github/workflows/update_tracking_issue.yml b/.github/workflows/update_tracking_issue.yml index 124ea1e8c6ac4..d4538fe6848d8 100644 --- a/.github/workflows/update_tracking_issue.yml +++ b/.github/workflows/update_tracking_issue.yml @@ -27,7 +27,7 @@ jobs: if: github.repository == 'scikit-learn/scikit-learn' && github.event_name == 'schedule' steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' - name: Update tracking issue on GitHub diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 49da927d67178..8e0073e67426b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -40,7 +40,7 @@ jobs: name: Check build trigger run: bash build_tools/github/check_build_trigger.sh - # Build the wheels for Linux, Windows and macOS for Python 3.8 and newer + # Build the wheels for Linux, Windows and macOS for Python 3.9 and newer build_wheels: name: Build wheel for cp${{ matrix.python }}-${{ matrix.platform_id }}-${{ matrix.manylinux_image }} runs-on: ${{ matrix.os }} @@ -53,11 +53,6 @@ jobs: matrix: include: # Window 64 bit - # Note: windows-2019 is needed for older Python versions: - # https://github.com/scikit-learn/scikit-learn/issues/22530 - - os: windows-2019 - python: 38 - platform_id: win_amd64 - os: windows-latest python: 39 platform_id: win_amd64 @@ -67,12 +62,11 @@ jobs: - os: windows-latest python: 311 platform_id: win_amd64 + - os: windows-latest + python: 312 + platform_id: win_amd64 # Linux 64 bit manylinux2014 - - os: ubuntu-latest - python: 38 - platform_id: manylinux_x86_64 - manylinux_image: manylinux2014 - os: ubuntu-latest python: 39 platform_id: manylinux_x86_64 @@ -88,65 +82,97 @@ jobs: python: 311 platform_id: manylinux_x86_64 manylinux_image: manylinux2014 + - os: ubuntu-latest + python: 312 + platform_id: manylinux_x86_64 + manylinux_image: manylinux2014 # MacOS x86_64 - - os: macos-latest - python: 38 - platform_id: macosx_x86_64 - - os: macos-latest + - os: macos-12 python: 39 platform_id: macosx_x86_64 - - os: macos-latest + - os: macos-12 python: 310 platform_id: macosx_x86_64 - - os: macos-latest + - os: macos-12 python: 311 platform_id: macosx_x86_64 + - os: macos-12 + python: 312 + platform_id: macosx_x86_64 # MacOS arm64 - - os: macos-latest - python: 38 - platform_id: macosx_arm64 - - os: macos-latest + - os: macos-14 python: 39 platform_id: macosx_arm64 - - os: macos-latest + - os: macos-14 python: 310 platform_id: macosx_arm64 - - os: macos-latest + - os: macos-14 python: 311 platform_id: macosx_arm64 + - os: macos-14 + python: 312 + platform_id: macosx_arm64 steps: - name: Checkout scikit-learn uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.9' # update once build dependencies are available + python-version: "3.11" # update once build dependencies are available + + - name: Install conda for macos arm64 + if: ${{ matrix.platform_id == 'macosx_arm64' }} + run: | + set -ex + # macos arm64 runners do not have conda installed. Thus we much install conda manually + EXPECTED_SHA="dd832d8a65a861b5592b2cf1d55f26031f7c1491b30321754443931e7b1e6832" + MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-MacOSX-arm64.sh" + curl -L --retry 10 $MINIFORGE_URL -o miniforge.sh + + # Check SHA + file_sha=$(shasum -a 256 miniforge.sh | awk '{print $1}') + if [ "$EXPECTED_SHA" != "$file_sha" ]; then + echo "SHA values did not match!" + exit 1 + fi + + # Install miniforge + MINIFORGE_PATH=$HOME/miniforge + bash ./miniforge.sh -b -p $MINIFORGE_PATH + echo "$MINIFORGE_PATH/bin" >> $GITHUB_PATH + echo "CONDA_HOME=$MINIFORGE_PATH" >> $GITHUB_ENV + + - name: Set conda environment for non-macos arm64 environments + if: ${{ matrix.platform_id != 'macosx_arm64' }} + run: | + # Non-macos arm64 envrionments already have conda installed + echo "CONDA_HOME=/usr/local/miniconda" >> $GITHUB_ENV - name: Build and test wheels env: - CONFTEST_PATH: ${{ github.workspace }}/conftest.py - CONFTEST_NAME: conftest.py - CIBW_ENVIRONMENT: OMP_NUM_THREADS=2 - OPENBLAS_NUM_THREADS=2 - SKLEARN_SKIP_NETWORK_TESTS=1 - SKLEARN_BUILD_PARALLEL=3 + CIBW_PRERELEASE_PYTHONS: ${{ matrix.prerelease }} + CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1 + SKLEARN_BUILD_PARALLEL=3 CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} CIBW_ARCHS: all CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux_image }} CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux_image }} - CIBW_TEST_SKIP: "*-macosx_arm64" + # Needed on Windows CI to compile with Visual Studio compiler + # otherwise Meson detects a MINGW64 platform and use MINGW64 + # toolchain + CIBW_CONFIG_SETTINGS_WINDOWS: "setup-args=--vsenv" CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: bash build_tools/github/repair_windows_wheels.sh {wheel} {dest_dir} CIBW_BEFORE_TEST_WINDOWS: bash build_tools/github/build_minimal_windows_image.sh ${{ matrix.python }} - CIBW_TEST_REQUIRES: pytest pandas threadpoolctl - CIBW_TEST_COMMAND: bash {project}/build_tools/github/test_wheels.sh + CIBW_TEST_REQUIRES: pytest pandas ${{ matrix.python == 312 && 'numpy>=2.0.0rc2' || '' }} + CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh CIBW_TEST_COMMAND_WINDOWS: bash {project}/build_tools/github/test_windows_wheels.sh ${{ matrix.python }} CIBW_BUILD_VERBOSITY: 1 - run: bash build_tools/github/build_wheels.sh + run: bash build_tools/wheels/build_wheels.sh - name: Store artifacts uses: actions/upload-artifact@v3 @@ -174,9 +200,9 @@ jobs: uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.9' # update once build dependencies are available + python-version: "3.9" # update once build dependencies are available - name: Build source distribution run: bash build_tools/github/build_source.sh @@ -186,8 +212,6 @@ jobs: - name: Test source distribution run: bash build_tools/github/test_source.sh env: - OMP_NUM_THREADS: 2 - OPENBLAS_NUM_THREADS: 2 SKLEARN_SKIP_NETWORK_TESTS: 1 - name: Store artifacts @@ -199,6 +223,7 @@ jobs: upload_anaconda: name: Upload to Anaconda runs-on: ubuntu-latest + environment: upload_anaconda needs: [build_wheels, build_sdist] # The artifacts cannot be uploaded on PRs if: github.event_name != 'pull_request' @@ -213,12 +238,13 @@ jobs: path: dist - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 - name: Upload artifacts env: # Secret variables need to be mapped to environment variables explicitly SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN }} SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_STAGING_UPLOAD_TOKEN }} + ARTIFACTS_PATH: dist/artifact # Force a replacement if the remote file already exists run: bash build_tools/github/upload_anaconda.sh diff --git a/.gitignore b/.gitignore index 47ec8fa2faf79..9f3b453bbfd74 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ sklearn/**/*.html dist/ MANIFEST +doc/sg_execution_times.rst doc/_build/ doc/auto_examples/ doc/modules/generated/ @@ -53,11 +54,15 @@ nips2010_pdf/ examples/cluster/joblib reuters/ benchmarks/bench_covertype_data/ +benchmarks/HIGGS.csv.gz +bench_pca_solvers.csv *.prefs .pydevproject .idea .vscode +# used by pyenv +.python-version *.c *.cpp @@ -85,10 +90,12 @@ sklearn/utils/_seq_dataset.pxd sklearn/utils/_weight_vector.pyx sklearn/utils/_weight_vector.pxd sklearn/linear_model/_sag_fast.pyx +sklearn/linear_model/_sgd_fast.pyx sklearn/metrics/_dist_metrics.pyx sklearn/metrics/_dist_metrics.pxd sklearn/metrics/_pairwise_distances_reduction/_argkmin.pxd sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx +sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx sklearn/metrics/_pairwise_distances_reduction/_base.pxd sklearn/metrics/_pairwise_distances_reduction/_base.pyx sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd @@ -97,3 +104,10 @@ sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pxd sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx +sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors_classmode.pyx +sklearn/neighbors/_ball_tree.pyx +sklearn/neighbors/_binary_tree.pxi +sklearn/neighbors/_kd_tree.pyx + +# Default JupyterLite content +jupyterlite_contents diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e5a6018df4473..31af43b6bbab0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,18 +5,25 @@ repos: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.2.1 + hooks: + - id: ruff + args: ["--fix", "--output-format=full"] - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 24.3.0 hooks: - id: black -- repo: https://github.com/pycqa/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - types: [file, python] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 + rev: v1.9.0 hooks: - id: mypy files: sklearn/ additional_dependencies: [pytest==6.2.4] +- repo: https://github.com/MarcoGorelli/cython-lint + rev: v0.15.0 + hooks: + # TODO: add the double-quote-cython-strings hook when it's usability has improved: + # possibility to pass a directory and use it as a check instead of auto-formatter. + - id: cython-lint diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4f0bd8def013e..0000000000000 --- a/.travis.yml +++ /dev/null @@ -1,93 +0,0 @@ -# Make it explicit that we favor the -# new container-based Travis workers -language: python -dist: xenial -# Only used to install cibuildwheel, CIBW_BUILD determines the python version being -# built in the docker image itself. Also: travis does not have 3.10 yet. -python: 3.9 - -cache: - apt: true - directories: - - $HOME/.cache/pip - - $HOME/.ccache - -env: - global: - - CPU_COUNT=3 - - TEST_DIR=/tmp/sklearn # Test directory for continuous integration jobs - - PYTEST_VERSION=latest - - OMP_NUM_THREADS=2 - - OPENBLAS_NUM_THREADS=2 - - SKLEARN_BUILD_PARALLEL=3 - - SKLEARN_SKIP_NETWORK_TESTS=1 - - PYTHONUNBUFFERED=1 - # Custom environment variables for the ARM wheel builder - - CIBW_BUILD_VERBOSITY=1 - - CIBW_TEST_COMMAND="bash {project}/build_tools/travis/test_wheels.sh" - - CIBW_ENVIRONMENT="CPU_COUNT=4 - OMP_NUM_THREADS=2 - OPENBLAS_NUM_THREADS=2 - SKLEARN_BUILD_PARALLEL=10 - SKLEARN_SKIP_NETWORK_TESTS=1 - PYTHONUNBUFFERED=1" - -jobs: - include: - # Linux environments to build the scikit-learn wheels for the ARM64 - # architecture and Python 3.8 and newer. This is used both at release time - # with the manual trigger in the commit message in the release branch and as - # a scheduled task to build the weekly dev build on the main branch. The - # weekly frequency is meant to avoid depleting the Travis CI credits too - # fast. - - os: linux - arch: arm64-graviton2 - dist: focal - virt: vm - group: edge - if: type = cron or commit_message =~ /\[cd build\]/ - env: - - CIBW_BUILD=cp38-manylinux_aarch64 - - BUILD_WHEEL=true - - - os: linux - arch: arm64-graviton2 - dist: focal - virt: vm - group: edge - if: type = cron or commit_message =~ /\[cd build\]/ - env: - - CIBW_BUILD=cp39-manylinux_aarch64 - - BUILD_WHEEL=true - - - os: linux - arch: arm64-graviton2 - dist: focal - virt: vm - group: edge - if: type = cron or commit_message =~ /\[cd build\]/ - env: - - CIBW_BUILD=cp310-manylinux_aarch64 - - BUILD_WHEEL=true - - - os: linux - arch: arm64-graviton2 - dist: focal - virt: vm - group: edge - if: type = cron or commit_message =~ /\[cd build\]/ - env: - - CIBW_BUILD=cp311-manylinux_aarch64 - - BUILD_WHEEL=true - -install: source build_tools/travis/install.sh || travis_terminate 1 -script: source build_tools/travis/script.sh || travis_terminate 1 -after_success: source build_tools/travis/after_success.sh || travis_terminate 1 - -notifications: - webhooks: - urls: - - https://webhooks.gitter.im/e/4ffabb4df010b70cd624 - on_success: change - on_failure: always - on_start: never diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f6f65883c65b2..92a673462e3a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,7 +18,7 @@ Documentation can be found under the But there are many other ways to help. In particular answering queries on the [issue tracker](https://github.com/scikit-learn/scikit-learn/issues), investigating bugs, and [reviewing other developers' pull -requests](http://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines) +requests](https://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines) are very valuable contributions that decrease the burden on the project maintainers. @@ -30,8 +30,8 @@ link to it from your website, or simply star it in GitHub to say "I use it". Quick links ----------- -* [Submitting a bug report or feature request](http://scikit-learn.org/dev/developers/contributing.html#submitting-a-bug-report-or-a-feature-request) -* [Contributing code](http://scikit-learn.org/dev/developers/contributing.html#contributing-code) +* [Submitting a bug report or feature request](https://scikit-learn.org/dev/developers/contributing.html#submitting-a-bug-report-or-a-feature-request) +* [Contributing code](https://scikit-learn.org/dev/developers/contributing.html#contributing-code) * [Coding guidelines](https://scikit-learn.org/dev/developers/develop.html#coding-guidelines) * [Tips to read current code](https://scikit-learn.org/dev/developers/contributing.html#reading-the-existing-code-base) diff --git a/COPYING b/COPYING index bddf6ed887ce9..e1cd01d584578 100644 --- a/COPYING +++ b/COPYING @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2007-2022 The scikit-learn developers. +Copyright (c) 2007-2024 The scikit-learn developers. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/MANIFEST.in b/MANIFEST.in index 11e5bdce02988..1596d4cd011df 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,6 @@ include *.rst +include *.build +recursive-include sklearn *.build recursive-include doc * recursive-include examples * recursive-include sklearn *.c *.cpp *.h *.pyx *.pxd *.pxi *.tp @@ -22,6 +24,7 @@ recursive-exclude maint_tools * recursive-exclude benchmarks * recursive-exclude .binder * recursive-exclude .circleci * +exclude .cirrus.star exclude .codecov.yml exclude .git-blame-ignore-revs exclude .mailmap diff --git a/Makefile b/Makefile index 5ea64dc0d6cac..52374ba44ff79 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,12 @@ in: inplace # just a shortcut inplace: $(PYTHON) setup.py build_ext -i +dev-meson: + pip install --verbose --no-build-isolation --editable . --config-settings editable-verbose=true + +clean-meson: + pip uninstall -y scikit-learn + test-code: in $(PYTEST) --showlocals -v sklearn --durations=20 test-sphinxext: @@ -61,5 +67,4 @@ doc-noplot: inplace $(MAKE) -C doc html-noplot code-analysis: - flake8 sklearn | grep -v __init__ | grep -v external - pylint -E -i y sklearn/ -d E1103,E0611,E1101 + build_tools/linting.sh diff --git a/README.rst b/README.rst index 364d45866636e..4ac297063c26e 100644 --- a/README.rst +++ b/README.rst @@ -1,48 +1,48 @@ .. -*- mode: rst -*- -|Azure|_ |Travis|_ |Codecov|_ |CircleCI|_ |Nightly wheels|_ |Black|_ |PythonVersion|_ |PyPi|_ |DOI|_ |Benchmark|_ +|Azure| |CirrusCI| |Codecov| |CircleCI| |Nightly wheels| |Black| |PythonVersion| |PyPi| |DOI| |Benchmark| .. |Azure| image:: https://dev.azure.com/scikit-learn/scikit-learn/_apis/build/status/scikit-learn.scikit-learn?branchName=main -.. _Azure: https://dev.azure.com/scikit-learn/scikit-learn/_build/latest?definitionId=1&branchName=main + :target: https://dev.azure.com/scikit-learn/scikit-learn/_build/latest?definitionId=1&branchName=main -.. |CircleCI| image:: https://circleci.com/gh/scikit-learn/scikit-learn/tree/main.svg?style=shield&circle-token=:circle-token -.. _CircleCI: https://circleci.com/gh/scikit-learn/scikit-learn +.. |CircleCI| image:: https://circleci.com/gh/scikit-learn/scikit-learn/tree/main.svg?style=shield + :target: https://circleci.com/gh/scikit-learn/scikit-learn -.. |Travis| image:: https://api.travis-ci.com/scikit-learn/scikit-learn.svg?branch=main -.. _Travis: https://app.travis-ci.com/github/scikit-learn/scikit-learn +.. |CirrusCI| image:: https://img.shields.io/cirrus/github/scikit-learn/scikit-learn/main?label=Cirrus%20CI + :target: https://cirrus-ci.com/github/scikit-learn/scikit-learn/main .. |Codecov| image:: https://codecov.io/gh/scikit-learn/scikit-learn/branch/main/graph/badge.svg?token=Pk8G9gg3y9 -.. _Codecov: https://codecov.io/gh/scikit-learn/scikit-learn + :target: https://codecov.io/gh/scikit-learn/scikit-learn .. |Nightly wheels| image:: https://github.com/scikit-learn/scikit-learn/workflows/Wheel%20builder/badge.svg?event=schedule -.. _`Nightly wheels`: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule + :target: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule -.. |PythonVersion| image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue -.. _PythonVersion: https://pypi.org/project/scikit-learn/ +.. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/scikit-learn.svg + :target: https://pypi.org/project/scikit-learn/ .. |PyPi| image:: https://img.shields.io/pypi/v/scikit-learn -.. _PyPi: https://pypi.org/project/scikit-learn + :target: https://pypi.org/project/scikit-learn .. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg -.. _Black: https://github.com/psf/black + :target: https://github.com/psf/black .. |DOI| image:: https://zenodo.org/badge/21369/scikit-learn/scikit-learn.svg -.. _DOI: https://zenodo.org/badge/latestdoi/21369/scikit-learn/scikit-learn + :target: https://zenodo.org/badge/latestdoi/21369/scikit-learn/scikit-learn .. |Benchmark| image:: https://img.shields.io/badge/Benchmarked%20by-asv-blue -.. _`Benchmark`: https://scikit-learn.org/scikit-learn-benchmarks/ - -.. |PythonMinVersion| replace:: 3.8 -.. |NumPyMinVersion| replace:: 1.17.3 -.. |SciPyMinVersion| replace:: 1.3.2 -.. |JoblibMinVersion| replace:: 1.1.1 -.. |ThreadpoolctlMinVersion| replace:: 2.0.0 -.. |MatplotlibMinVersion| replace:: 3.1.3 -.. |Scikit-ImageMinVersion| replace:: 0.16.2 -.. |PandasMinVersion| replace:: 1.0.5 + :target: https://scikit-learn.org/scikit-learn-benchmarks + +.. |PythonMinVersion| replace:: 3.9 +.. |NumPyMinVersion| replace:: 1.19.5 +.. |SciPyMinVersion| replace:: 1.6.0 +.. |JoblibMinVersion| replace:: 1.2.0 +.. |ThreadpoolctlMinVersion| replace:: 3.1.0 +.. |MatplotlibMinVersion| replace:: 3.3.4 +.. |Scikit-ImageMinVersion| replace:: 0.17.2 +.. |PandasMinVersion| replace:: 1.1.5 .. |SeabornMinVersion| replace:: 0.9.0 -.. |PytestMinVersion| replace:: 5.3.1 -.. |PlotlyMinVersion| replace:: 5.10.0 +.. |PytestMinVersion| replace:: 7.1.2 +.. |PlotlyMinVersion| replace:: 5.14.0 .. image:: https://raw.githubusercontent.com/scikit-learn/scikit-learn/main/doc/logos/scikit-learn-logo.png :target: https://scikit-learn.org/ @@ -80,7 +80,7 @@ scikit-learn 1.0 and later require Python 3.7 or newer. scikit-learn 1.1 and later require Python 3.8 or newer. Scikit-learn plotting capabilities (i.e., functions start with ``plot_`` and -classes end with "Display") require Matplotlib (>= |MatplotlibMinVersion|). +classes end with ``Display``) require Matplotlib (>= |MatplotlibMinVersion|). For running the examples Matplotlib >= |MatplotlibMinVersion| is required. A few examples require scikit-image >= |Scikit-ImageMinVersion|, a few examples require pandas >= |PandasMinVersion|, some examples require seaborn >= @@ -89,7 +89,7 @@ require pandas >= |PandasMinVersion|, some examples require seaborn >= User installation ~~~~~~~~~~~~~~~~~ -If you already have a working installation of numpy and scipy, +If you already have a working installation of NumPy and SciPy, the easiest way to install scikit-learn is using ``pip``:: pip install -U scikit-learn @@ -184,20 +184,21 @@ Communication ~~~~~~~~~~~~~ - Mailing list: https://mail.python.org/mailman/listinfo/scikit-learn -- Gitter: https://gitter.im/scikit-learn/scikit-learn - Logos & Branding: https://github.com/scikit-learn/scikit-learn/tree/main/doc/logos - Blog: https://blog.scikit-learn.org - Calendar: https://blog.scikit-learn.org/calendar/ - Twitter: https://twitter.com/scikit_learn -- Twitter (commits): https://twitter.com/sklearn_commits - Stack Overflow: https://stackoverflow.com/questions/tagged/scikit-learn -- Github Discussions: https://github.com/scikit-learn/scikit-learn/discussions +- GitHub Discussions: https://github.com/scikit-learn/scikit-learn/discussions - Website: https://scikit-learn.org - LinkedIn: https://www.linkedin.com/company/scikit-learn - YouTube: https://www.youtube.com/channel/UCJosFjYm0ZYVUARxuOZqnnw/playlists - Facebook: https://www.facebook.com/scikitlearnofficial/ - Instagram: https://www.instagram.com/scikitlearnofficial/ - TikTok: https://www.tiktok.com/@scikit.learn +- Mastodon: https://mastodon.social/@sklearn@fosstodon.org +- Discord: https://discord.gg/h9qyrK8Jc8 + Citation ~~~~~~~~ diff --git a/SECURITY.md b/SECURITY.md index 1c9c607a8be30..18bb99ea3c15c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,10 +2,10 @@ ## Supported Versions -| Version | Supported | -| --------- | ------------------ | -| 1.1.3 | :white_check_mark: | -| < 1.1.3 | :x: | +| Version | Supported | +| ------------- | ------------------ | +| 1.4.2 | :white_check_mark: | +| < 1.4.2 | :x: | ## Reporting a Vulnerability diff --git a/asv_benchmarks/asv.conf.json b/asv_benchmarks/asv.conf.json index 9f65d194b6d84..3392925d7a488 100644 --- a/asv_benchmarks/asv.conf.json +++ b/asv_benchmarks/asv.conf.json @@ -71,13 +71,17 @@ // pip (with all the conda available packages installed first, // followed by the pip installed packages). // + // The versions of the dependencies should be bumped in a dedicated commit + // to easily identify regressions/improvements due to code changes from + // those due to dependency changes. + // "matrix": { - "numpy": [], - "scipy": [], - "cython": [], - "joblib": [], - "threadpoolctl": [], - "pandas": [] + "numpy": ["1.25.2"], + "scipy": ["1.11.2"], + "cython": ["3.0.10"], + "joblib": ["1.3.2"], + "threadpoolctl": ["3.2.0"], + "pandas": ["2.1.0"] }, // Combinations of libraries/python versions can be excluded/included diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py index ba460e6b503a6..457a15dd938e9 100644 --- a/asv_benchmarks/benchmarks/cluster.py +++ b/asv_benchmarks/benchmarks/cluster.py @@ -1,7 +1,7 @@ from sklearn.cluster import KMeans, MiniBatchKMeans from .common import Benchmark, Estimator, Predictor, Transformer -from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset +from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset from .utils import neg_mean_inertia diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py index c3e114a212047..c12da551010f6 100644 --- a/asv_benchmarks/benchmarks/common.py +++ b/asv_benchmarks/benchmarks/common.py @@ -1,11 +1,11 @@ -import os +import itertools import json -import timeit +import os import pickle -import itertools +import timeit from abc import ABC, abstractmethod -from pathlib import Path from multiprocessing import cpu_count +from pathlib import Path import numpy as np @@ -23,7 +23,7 @@ def get_from_config(): n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS") if n_jobs_vals_env: - n_jobs_vals = eval(n_jobs_vals_env) + n_jobs_vals = json.loads(n_jobs_vals_env) else: n_jobs_vals = config["n_jobs_vals"] if not n_jobs_vals: diff --git a/asv_benchmarks/benchmarks/datasets.py b/asv_benchmarks/benchmarks/datasets.py index dbe0eac0b822c..bbf5029062448 100644 --- a/asv_benchmarks/benchmarks/datasets.py +++ b/asv_benchmarks/benchmarks/datasets.py @@ -1,21 +1,22 @@ +from pathlib import Path + import numpy as np import scipy.sparse as sp from joblib import Memory -from pathlib import Path -from sklearn.decomposition import TruncatedSVD from sklearn.datasets import ( - make_blobs, fetch_20newsgroups, + fetch_olivetti_faces, fetch_openml, load_digits, - make_regression, + make_blobs, make_classification, - fetch_olivetti_faces, + make_regression, ) -from sklearn.preprocessing import MaxAbsScaler, StandardScaler +from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MaxAbsScaler, StandardScaler # memory location for caching datasets M = Memory(location=str(Path(__file__).resolve().parent / "cache")) @@ -59,9 +60,7 @@ def _20newsgroups_lowdim_dataset(n_components=100, ngrams=(1, 1), dtype=np.float @M.cache def _mnist_dataset(dtype=np.float32): - X, y = fetch_openml( - "mnist_784", version=1, return_X_y=True, as_frame=False, parser="pandas" - ) + X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False) X = X.astype(dtype, copy=False) X = MaxAbsScaler().fit_transform(X) diff --git a/asv_benchmarks/benchmarks/decomposition.py b/asv_benchmarks/benchmarks/decomposition.py index 02a7862caeb69..0a7bb7ad07f3e 100644 --- a/asv_benchmarks/benchmarks/decomposition.py +++ b/asv_benchmarks/benchmarks/decomposition.py @@ -1,8 +1,8 @@ from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning from .common import Benchmark, Estimator, Transformer -from .datasets import _olivetti_faces_dataset, _mnist_dataset -from .utils import make_pca_scorers, make_dict_learning_scorers +from .datasets import _mnist_dataset, _olivetti_faces_dataset +from .utils import make_dict_learning_scorers, make_pca_scorers class PCABenchmark(Transformer, Estimator, Benchmark): diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py index 8c5a28e3da90f..c336d1e5f8805 100644 --- a/asv_benchmarks/benchmarks/ensemble.py +++ b/asv_benchmarks/benchmarks/ensemble.py @@ -1,7 +1,7 @@ from sklearn.ensemble import ( - RandomForestClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, + RandomForestClassifier, ) from .common import Benchmark, Estimator, Predictor diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py index 663ceca61d063..24153895611df 100644 --- a/asv_benchmarks/benchmarks/linear_model.py +++ b/asv_benchmarks/benchmarks/linear_model.py @@ -1,9 +1,9 @@ from sklearn.linear_model import ( - LogisticRegression, - Ridge, ElasticNet, Lasso, LinearRegression, + LogisticRegression, + Ridge, SGDRegressor, ) @@ -52,7 +52,6 @@ def make_estimator(self, params): estimator = LogisticRegression( solver=solver, penalty=penalty, - multi_class="multinomial", tol=0.01, n_jobs=n_jobs, random_state=0, @@ -164,7 +163,11 @@ def make_data(self, params): return data def make_estimator(self, params): - estimator = SGDRegressor(max_iter=1000, tol=1e-16, random_state=0) + (representation,) = params + + max_iter = 60 if representation == "dense" else 300 + + estimator = SGDRegressor(max_iter=max_iter, tol=None, random_state=0) return estimator diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3f6b96dff9f60..9b0e8c2259f19 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -33,18 +33,13 @@ jobs: inputs: versionSpec: '3.9' - bash: | + source build_tools/shared.sh # Include pytest compatibility with mypy - pip install pytest flake8 mypy==0.961 black==22.3.0 + pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint displayName: Install linters - bash: | - black --check --diff . - displayName: Run black - - bash: | - ./build_tools/azure/linting.sh - displayName: Run linting - - bash: | - mypy sklearn/ - displayName: Run mypy + ./build_tools/linting.sh + displayName: Run linters - template: build_tools/azure/posix.yml parameters: @@ -64,20 +59,16 @@ jobs: pylatest_pip_scipy_dev: DISTRIB: 'conda-pip-scipy-dev' LOCK_FILE: './build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock' - CHECK_WARNINGS: 'true' + SKLEARN_WARNINGS_AS_ERRORS: '1' CHECK_PYTEST_SOFT_DEPENDENCY: 'true' - TEST_DOCSTRINGS: 'true' - # Tests that require large downloads over the networks are skipped in CI. - # Here we make sure, that they are still run on a regular basis. - SKLEARN_SKIP_NETWORK_TESTS: '0' -- template: build_tools/azure/posix.yml +- template: build_tools/azure/posix-docker.yml # Experimental CPython branch without the Global Interpreter Lock: # https://github.com/colesbury/nogil/ # # The nogil build relies on a dedicated PyPI-style index to install patched # versions of NumPy, SciPy and Cython maintained by @colesbury and that - # include specifc fixes to make them run correctly without relying on the GIL. + # include specific fixes to make them run correctly without relying on the GIL. # # The goal of this CI entry is to make sure that we do not introduce any # dependency on the GIL in scikit-learn itself. An auxiliary goal is to early @@ -102,6 +93,7 @@ jobs: ) matrix: pylatest_pip_nogil: + DOCKER_CONTAINER: 'nogil/python' DISTRIB: 'pip-nogil' LOCK_FILE: './build_tools/azure/python_nogil_lock.txt' COVERAGE: 'false' @@ -126,6 +118,40 @@ jobs: DISTRIB: 'conda-pypy3' LOCK_FILE: './build_tools/azure/pypy3_linux-64_conda.lock' + +- job: Linux_Nightly_Pyodide + pool: + vmImage: ubuntu-22.04 + variables: + # Need to match Python version and Emscripten version for the correct + # Pyodide version. For example, for Pyodide version 0.25.1, see + # https://github.com/pyodide/pyodide/blob/0.25.1/Makefile.envs + PYODIDE_VERSION: '0.25.1' + EMSCRIPTEN_VERSION: '3.1.46' + PYTHON_VERSION: '3.11.3' + + dependsOn: [git_commit, linting] + condition: | + and( + succeeded(), + not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')), + or(eq(variables['Build.Reason'], 'Schedule'), + contains(dependencies['git_commit']['outputs']['commit.message'], '[pyodide]' + ) + ) + ) + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: $(PYTHON_VERSION) + addToPath: true + + - bash: bash build_tools/azure/install_pyodide.sh + displayName: Build Pyodide wheel + + - bash: bash build_tools/azure/test_script_pyodide.sh + displayName: Test Pyodide wheel + # Will run all the time regardless of linting outcome. - template: build_tools/azure/posix.yml parameters: @@ -142,11 +168,19 @@ jobs: DISTRIB: 'conda' LOCK_FILE: './build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock' COVERAGE: 'true' - SHOW_SHORT_SUMMARY: 'true' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '42' # default global random seed + # Tests that require large downloads over the networks are skipped in CI. + # Here we make sure, that they are still run on a regular basis. + ${{ if eq(variables['Build.Reason'], 'Schedule') }}: + SKLEARN_SKIP_NETWORK_TESTS: '0' # Check compilation with Ubuntu 22.04 LTS (Jammy Jellyfish) and scipy from conda-forge -- template: build_tools/azure/posix.yml +# By default the CI is sequential, where `Ubuntu_Jammy_Jellyfish` runs first and +# the others jobs are run only if `Ubuntu_Jammy_Jellyfish` succeeds. +# When "[azure parallel]" is in the commit message, `Ubuntu_Jammy_Jellyfish` will +# run in parallel with the rest of the jobs. On Azure, the job's name will be +# `Ubuntu_Jammy_Jellyfish_Parallel`. +- template: build_tools/azure/posix-all-parallel.yml parameters: name: Ubuntu_Jammy_Jellyfish vmImage: ubuntu-22.04 @@ -156,63 +190,90 @@ jobs: succeeded(), not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) + commitMessage: dependencies['git_commit']['outputs']['commit.message'] matrix: - py38_conda_forge_openblas_ubuntu_2204: + pymin_conda_forge_openblas_ubuntu_2204: DISTRIB: 'conda' - LOCK_FILE: './build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock' + LOCK_FILE: './build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock' + SKLEARN_WARNINGS_AS_ERRORS: '1' COVERAGE: 'false' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '0' # non-default seed - template: build_tools/azure/posix.yml parameters: - name: Linux - vmImage: ubuntu-20.04 - dependsOn: [linting, git_commit] + name: Ubuntu_Atlas + vmImage: ubuntu-22.04 + dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] + # Runs when dependencies succeeded or skipped condition: | and( - succeeded(), + not(or(failed(), canceled())), not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: # Linux environment to test that scikit-learn can be built against - # versions of numpy, scipy with ATLAS that comes with Ubuntu Focal 20.04 - # i.e. numpy 1.17.4 and scipy 1.3.3 + # versions of numpy, scipy with ATLAS that comes with Ubuntu Jammy Jellyfish 22.04 + # i.e. numpy 1.21.5 and scipy 1.8.0 ubuntu_atlas: DISTRIB: 'ubuntu' LOCK_FILE: './build_tools/azure/ubuntu_atlas_lock.txt' COVERAGE: 'false' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '1' # non-default seed - # Linux + Python 3.8 build with OpenBLAS - py38_conda_defaults_openblas: + +- template: build_tools/azure/posix.yml + parameters: + name: Linux + vmImage: ubuntu-20.04 + dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] + # Runs when dependencies succeeded or skipped + condition: | + and( + not(or(failed(), canceled())), + not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) + ) + matrix: + # Linux + Python 3.9 build with OpenBLAS and without pandas + pymin_conda_defaults_openblas: DISTRIB: 'conda' - LOCK_FILE: './build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock' + LOCK_FILE: './build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock' + # Enable debug Cython directives to capture IndexError exceptions in + # combination with the -Werror::pytest.PytestUnraisableExceptionWarning + # flag for pytest. + # https://github.com/scikit-learn/scikit-learn/pull/24438 SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: '1' SKLEARN_RUN_FLOAT32_TESTS: '1' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '2' # non-default seed + BUILD_WITH_SETUPTOOLS: 'true' # Linux environment to test the latest available dependencies. # It runs tests requiring lightgbm, pandas and PyAMG. pylatest_pip_openblas_pandas: DISTRIB: 'conda-pip-latest' LOCK_FILE: './build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock' CHECK_PYTEST_SOFT_DEPENDENCY: 'true' - TEST_DOCSTRINGS: 'true' - CHECK_WARNINGS: 'true' + SKLEARN_WARNINGS_AS_ERRORS: '1' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '3' # non-default seed + # disable pytest-xdist to have 1 job where OpenMP and BLAS are not single + # threaded because by default the tests configuration (sklearn/conftest.py) + # makes sure that they are single threaded in each xdist subprocess. + PYTEST_XDIST_VERSION: 'none' + PIP_BUILD_ISOLATION: 'true' - template: build_tools/azure/posix-docker.yml parameters: name: Linux_Docker vmImage: ubuntu-20.04 - dependsOn: [linting, git_commit] + dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] + # Runs when dependencies succeeded or skipped condition: | and( - succeeded(), + not(or(failed(), canceled())), not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: debian_atlas_32bit: DOCKER_CONTAINER: 'i386/debian:11.2' DISTRIB: 'debian-32' + COVERAGE: "true" LOCK_FILE: './build_tools/azure/debian_atlas_32bit_lock.txt' # disable pytest xdist due to unknown bug with 32-bit container PYTEST_XDIST_VERSION: 'none' @@ -222,10 +283,11 @@ jobs: parameters: name: macOS vmImage: macOS-11 - dependsOn: [linting, git_commit] + dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] + # Runs when dependencies succeeded or skipped condition: | and( - succeeded(), + not(or(failed(), canceled())), not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: @@ -244,17 +306,27 @@ jobs: parameters: name: Windows vmImage: windows-latest - dependsOn: [linting, git_commit] + dependsOn: [linting, git_commit, Ubuntu_Jammy_Jellyfish] + # Runs when dependencies succeeded or skipped condition: | and( - succeeded(), + not(or(failed(), canceled())), not(contains(dependencies['git_commit']['outputs']['commit.message'], '[ci skip]')) ) matrix: - py38_conda_forge_mkl: + pymin_conda_forge_mkl: DISTRIB: 'conda' - LOCK_FILE: ./build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock - CHECK_WARNINGS: 'true' - COVERAGE: 'true' + LOCK_FILE: ./build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock + SKLEARN_WARNINGS_AS_ERRORS: '1' + # The Azure Windows runner is typically much slower than other CI + # runners due to the lack of compiler cache. Running the tests with + # coverage enabled make them run extra slower. Since very few parts of + # code should have windows-specific code branches, it should be enable + # to restrict the code coverage collection to the non-windows runners. + COVERAGE: 'false' + # Enable debug Cython directives to capture IndexError exceptions in + # combination with the -Werror::pytest.PytestUnraisableExceptionWarning + # flag for pytest. + # https://github.com/scikit-learn/scikit-learn/pull/24438 SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: '1' SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '7' # non-default seed diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py index cf38bc73a38ec..44a117f1ad42d 100644 --- a/benchmarks/bench_20newsgroups.py +++ b/benchmarks/bench_20newsgroups.py @@ -1,18 +1,19 @@ -from time import time import argparse -import numpy as np +from time import time -from sklearn.dummy import DummyClassifier +import numpy as np from sklearn.datasets import fetch_20newsgroups_vectorized -from sklearn.metrics import accuracy_score -from sklearn.utils.validation import check_array - -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import AdaBoostClassifier +from sklearn.dummy import DummyClassifier +from sklearn.ensemble import ( + AdaBoostClassifier, + ExtraTreesClassifier, + RandomForestClassifier, +) from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score from sklearn.naive_bayes import MultinomialNB +from sklearn.utils.validation import check_array ESTIMATORS = { "dummy": DummyClassifier(), @@ -20,7 +21,7 @@ "extra_trees": ExtraTreesClassifier(max_features="sqrt", min_samples_split=10), "logistic_regression": LogisticRegression(), "naive_bayes": MultinomialNB(), - "adaboost": AdaBoostClassifier(n_estimators=10), + "adaboost": AdaBoostClassifier(n_estimators=10, algorithm="SAMME"), } @@ -28,7 +29,6 @@ # Data if __name__ == "__main__": - parser = argparse.ArgumentParser() parser.add_argument( "-e", "--estimators", nargs="+", required=True, choices=ESTIMATORS @@ -47,7 +47,7 @@ print(f"X_train.shape = {X_train.shape}") print(f"X_train.format = {X_train.format}") print(f"X_train.dtype = {X_train.dtype}") - print(f"X_train density = {X_train.nnz / np.product(X_train.shape)}") + print(f"X_train density = {X_train.nnz / np.prod(X_train.shape)}") print(f"y_train {y_train.shape}") print(f"X_test {X_test.shape}") print(f"X_test.format = {X_test.format}") diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py index 8a13a2d9806c6..5b8cdd588c8ee 100644 --- a/benchmarks/bench_covertype.py +++ b/benchmarks/bench_covertype.py @@ -45,20 +45,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np from joblib import Memory from sklearn.datasets import fetch_covtype, get_data_home -from sklearn.svm import LinearSVC -from sklearn.linear_model import SGDClassifier, LogisticRegression +from sklearn.ensemble import ( + ExtraTreesClassifier, + GradientBoostingClassifier, + RandomForestClassifier, +) +from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.metrics import zero_one_loss from sklearn.naive_bayes import GaussianNB +from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.metrics import zero_one_loss from sklearn.utils import check_array # Memoize the data extraction and memory map the resulting diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py index 98fa17b99f47a..b9d9efbdea4f1 100644 --- a/benchmarks/bench_feature_expansions.py +++ b/benchmarks/bench_feature_expansions.py @@ -1,8 +1,10 @@ +from time import time + import matplotlib.pyplot as plt import numpy as np import scipy.sparse as sparse + from sklearn.preprocessing import PolynomialFeatures -from time import time degree = 2 trials = 3 @@ -35,7 +37,6 @@ fig, axes = plt.subplots(nrows=len(densities), ncols=1, figsize=(8, 10)) for density, ax in zip(densities, axes): - ax.plot( dimensionalities, csr_times[density] / trials, diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py index 06ca4d1276e1c..84cf31858afa7 100644 --- a/benchmarks/bench_glm.py +++ b/benchmarks/bench_glm.py @@ -4,13 +4,14 @@ Data comes from a random square matrix. """ + from datetime import datetime + import numpy as np -from sklearn import linear_model +from sklearn import linear_model if __name__ == "__main__": - import matplotlib.pyplot as plt n_iter = 40 @@ -22,7 +23,6 @@ dimensions = 500 * np.arange(1, n_iter + 1) for i in range(n_iter): - print("Iteration %s of %s" % (i, n_iter)) n_samples, n_features = 10 * i + 3, 10 * i + 3 diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py index 8a0a0545bb627..1aaad99c10587 100644 --- a/benchmarks/bench_glmnet.py +++ b/benchmarks/bench_glmnet.py @@ -16,9 +16,12 @@ In both cases, only 10% of the features are informative. """ -import numpy as np + import gc from time import time + +import numpy as np + from sklearn.datasets import make_regression alpha = 0.1 @@ -45,11 +48,11 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef): if __name__ == "__main__": - from glmnet.elastic_net import Lasso as GlmnetLasso - from sklearn.linear_model import Lasso as ScikitLasso - # Delayed import of matplotlib.pyplot import matplotlib.pyplot as plt + from glmnet.elastic_net import Lasso as GlmnetLasso + + from sklearn.linear_model import Lasso as ScikitLasso scikit_results = [] glmnet_results = [] diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py index 163e21f98ed0d..c1dfffabe71c2 100644 --- a/benchmarks/bench_hist_gradient_boosting.py +++ b/benchmarks/bench_hist_gradient_boosting.py @@ -1,15 +1,16 @@ -from time import time import argparse +from time import time import matplotlib.pyplot as plt import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression -from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py index 1b5905b1cf4e8..97c762e8e9230 100644 --- a/benchmarks/bench_hist_gradient_boosting_adult.py +++ b/benchmarks/bench_hist_gradient_boosting_adult.py @@ -4,15 +4,14 @@ import numpy as np import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.compose import make_column_transformer, make_column_selector +from sklearn.compose import make_column_selector, make_column_transformer from sklearn.datasets import fetch_openml -from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split from sklearn.preprocessing import OrdinalEncoder - parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) parser.add_argument("--n-trees", type=int, default=100) @@ -50,7 +49,7 @@ def predict(est, data_test, target_test): print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}") -data = fetch_openml(data_id=179, as_frame=True, parser="pandas") # adult dataset +data = fetch_openml(data_id=179, as_frame=True) # adult dataset X, y = data.data, data.target # Ordinal encode the categories to use the native support available in HGBDT diff --git a/benchmarks/bench_hist_gradient_boosting_categorical_only.py b/benchmarks/bench_hist_gradient_boosting_categorical_only.py index e8d215170f9c8..1085bbc49f4f8 100644 --- a/benchmarks/bench_hist_gradient_boosting_categorical_only.py +++ b/benchmarks/bench_hist_gradient_boosting_categorical_only.py @@ -1,11 +1,10 @@ import argparse from time import time -from sklearn.preprocessing import KBinsDiscretizer from sklearn.datasets import make_classification from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.preprocessing import KBinsDiscretizer parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py index d6ed3b8e9700f..20057c50dc810 100644 --- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py +++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py @@ -1,17 +1,17 @@ -from urllib.request import urlretrieve +import argparse import os from gzip import GzipFile from time import time -import argparse +from urllib.request import urlretrieve import numpy as np import pandas as pd from joblib import Memory -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score, roc_auc_score + from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.metrics import accuracy_score, roc_auc_score +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) @@ -25,6 +25,7 @@ parser.add_argument("--no-predict", action="store_true", default=False) parser.add_argument("--cache-loc", type=str, default="/tmp") parser.add_argument("--no-interactions", type=bool, default=False) +parser.add_argument("--max-features", type=float, default=1.0) args = parser.parse_args() HERE = os.path.dirname(__file__) @@ -36,6 +37,7 @@ subsample = args.subsample lr = args.learning_rate max_bins = args.max_bins +max_features = args.max_features @m.cache @@ -104,6 +106,7 @@ def predict(est, data_test, target_test): random_state=0, verbose=1, interaction_cst=interaction_cst, + max_features=max_features, ) fit(est, data_train, target_train, "sklearn") predict(est, data_test, target_test) diff --git a/benchmarks/bench_hist_gradient_boosting_threading.py b/benchmarks/bench_hist_gradient_boosting_threading.py index 70787fd2eb479..9acf65bdbaf6a 100644 --- a/benchmarks/bench_hist_gradient_boosting_threading.py +++ b/benchmarks/bench_hist_gradient_boosting_threading.py @@ -1,18 +1,19 @@ -from time import time import argparse import os from pprint import pprint +from time import time import numpy as np from threadpoolctl import threadpool_limits + import sklearn -from sklearn.model_selection import train_test_split -from sklearn.ensemble import HistGradientBoostingRegressor -from sklearn.ensemble import HistGradientBoostingClassifier -from sklearn.datasets import make_classification -from sklearn.datasets import make_regression +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator - +from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser() parser.add_argument("--n-leaf-nodes", type=int, default=31) @@ -290,8 +291,8 @@ def one_run(n_threads, n_samples): if args.plot or args.plot_filename: - import matplotlib.pyplot as plt import matplotlib + import matplotlib.pyplot as plt fig, axs = plt.subplots(2, figsize=(12, 12)) diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py index 968ecf20876ae..743911936dccc 100644 --- a/benchmarks/bench_isolation_forest.py +++ b/benchmarks/bench_isolation_forest.py @@ -17,12 +17,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml from sklearn.ensemble import IsolationForest -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.preprocessing import LabelBinarizer from sklearn.utils import shuffle as sh @@ -52,7 +53,6 @@ def print_outlier_ratio(y): # Loop over all datasets for fitting and scoring the estimator: for dat in datasets: - # Loading and vectorizing the data: print("====== %s ======" % dat) print("--- Fetching data...") @@ -64,7 +64,7 @@ def print_outlier_ratio(y): y = dataset.target if dat == "shuttle": - dataset = fetch_openml("shuttle", as_frame=False, parser="pandas") + dataset = fetch_openml("shuttle", as_frame=False) X = dataset.data y = dataset.target.astype(np.int64) X, y = sh(X, y, random_state=random_state) diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py index 458a04a463303..556c452fa3323 100644 --- a/benchmarks/bench_isotonic.py +++ b/benchmarks/bench_isotonic.py @@ -10,13 +10,16 @@ This allows the scaling of the algorithm with the problem size to be visualized and understood. """ -import numpy as np + +import argparse import gc from datetime import datetime -from sklearn.isotonic import isotonic_regression -from scipy.special import expit + import matplotlib.pyplot as plt -import argparse +import numpy as np +from scipy.special import expit + +from sklearn.isotonic import isotonic_regression def generate_perturbed_logarithm_dataset(size): diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py index e4eddf9cb745a..26789c173688f 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py @@ -35,17 +35,17 @@ You can also set `arpack_all=True` to activate arpack solver for large number of components (this takes more time). """ + # Authors: Sylvain MARIE, Schneider Electric import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) @@ -82,7 +82,6 @@ r_time = np.empty((len(n_compo_range), n_iter)) * np.nan # loop for j, n_components in enumerate(n_compo_range): - n_components = int(n_components) print("Performing kPCA with n_components = %i" % n_components) diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py index b6d82647012d5..cae74c6f442ff 100644 --- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py +++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py @@ -37,17 +37,17 @@ Solvers comparison benchmark: time vs n_components", where this time the number of examples is fixed, and the desired number of components varies. """ + # Author: Sylvain MARIE, Schneider Electric import time -import numpy as np import matplotlib.pyplot as plt - +import numpy as np from numpy.testing import assert_array_almost_equal -from sklearn.decomposition import KernelPCA -from sklearn.datasets import make_circles +from sklearn.datasets import make_circles +from sklearn.decomposition import KernelPCA print(__doc__) @@ -83,7 +83,6 @@ # loop for j, n_samples in enumerate(n_samples_range): - n_samples = int(n_samples) print("Performing kPCA with n_samples = %i" % n_samples) diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py index 9a893545fbb28..9bae570505a75 100644 --- a/benchmarks/bench_lasso.py +++ b/benchmarks/bench_lasso.py @@ -11,8 +11,10 @@ In both cases, only 10% of the features are informative. """ + import gc from time import time + import numpy as np from sklearn.datasets import make_regression @@ -59,9 +61,10 @@ def compute_bench(alpha, n_samples, n_features, precompute): if __name__ == "__main__": - from sklearn.linear_model import Lasso, LassoLars import matplotlib.pyplot as plt + from sklearn.linear_model import Lasso, LassoLars + alpha = 0.01 # regularization parameter n_features = 10 diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py index 31057e2e4067b..2c9732fab901f 100644 --- a/benchmarks/bench_lof.py +++ b/benchmarks/bench_lof.py @@ -18,11 +18,13 @@ """ from time import time -import numpy as np + import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml +from sklearn.metrics import auc, roc_curve from sklearn.neighbors import LocalOutlierFactor -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml from sklearn.preprocessing import LabelBinarizer print(__doc__) @@ -44,7 +46,7 @@ y = dataset.target if dataset_name == "shuttle": - dataset = fetch_openml("shuttle", as_frame=False, parser="pandas") + dataset = fetch_openml("shuttle", as_frame=False) X = dataset.data y = dataset.target.astype(np.int64) # we remove data with label 4 diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py index c50bfc2e594d6..334e69ed5a30a 100644 --- a/benchmarks/bench_mnist.py +++ b/benchmarks/bench_mnist.py @@ -30,26 +30,24 @@ # Arnaud Joly # License: BSD 3 clause +import argparse import os from time import time -import argparse + import numpy as np from joblib import Memory -from sklearn.datasets import fetch_openml -from sklearn.datasets import get_data_home -from sklearn.ensemble import ExtraTreesClassifier -from sklearn.ensemble import RandomForestClassifier +from sklearn.datasets import fetch_openml, get_data_home from sklearn.dummy import DummyClassifier -from sklearn.kernel_approximation import Nystroem -from sklearn.kernel_approximation import RBFSampler +from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier +from sklearn.kernel_approximation import Nystroem, RBFSampler +from sklearn.linear_model import LogisticRegression from sklearn.metrics import zero_one_loss +from sklearn.neural_network import MLPClassifier from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC from sklearn.tree import DecisionTreeClassifier from sklearn.utils import check_array -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier # Memoize the data extraction and memory map the resulting # train / test splits in readonly mode @@ -62,7 +60,7 @@ def load_data(dtype=np.float32, order="F"): ###################################################################### # Load dataset print("Loading dataset...") - data = fetch_openml("mnist_784", as_frame=True, parser="pandas") + data = fetch_openml("mnist_784", as_frame=True) X = check_array(data["data"], dtype=dtype, order=order) y = data["target"] @@ -223,7 +221,6 @@ def load_data(dtype=np.float32, order="F"): ) print("-" * 60) for name in sorted(args["classifiers"], key=error.get): - print( "{0: <23} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}".format( name, train_time[name], test_time[name], error[name] diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py index 2a87b388e91a2..1b8449a24da51 100755 --- a/benchmarks/bench_multilabel_metrics.py +++ b/benchmarks/bench_multilabel_metrics.py @@ -3,26 +3,25 @@ A comparison of multilabel target formats and metrics over them """ -from timeit import timeit -from functools import partial -import itertools import argparse +import itertools import sys +from functools import partial +from timeit import timeit import matplotlib.pyplot as plt -import scipy.sparse as sp import numpy as np +import scipy.sparse as sp from sklearn.datasets import make_multilabel_classification from sklearn.metrics import ( - f1_score, accuracy_score, + f1_score, hamming_loss, jaccard_similarity_score, ) from sklearn.utils._testing import ignore_warnings - METRICS = { "f1": partial(f1_score, average="micro"), "f1-by-sample": partial(f1_score, average="samples"), diff --git a/benchmarks/bench_online_ocsvm.py b/benchmarks/bench_online_ocsvm.py index c7eaefe082948..9f92150e079dd 100644 --- a/benchmarks/bench_online_ocsvm.py +++ b/benchmarks/bench_online_ocsvm.py @@ -15,21 +15,20 @@ """ from time import time -import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import numpy as np from scipy.interpolate import interp1d -from sklearn.metrics import roc_curve, auc -from sklearn.datasets import fetch_kddcup99, fetch_covtype -from sklearn.preprocessing import LabelBinarizer, StandardScaler -from sklearn.pipeline import make_pipeline -from sklearn.utils import shuffle +from sklearn.datasets import fetch_covtype, fetch_kddcup99 from sklearn.kernel_approximation import Nystroem -from sklearn.svm import OneClassSVM from sklearn.linear_model import SGDOneClassSVM - -import matplotlib.pyplot as plt -import matplotlib +from sklearn.metrics import auc, roc_curve +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import LabelBinarizer, StandardScaler +from sklearn.svm import OneClassSVM +from sklearn.utils import shuffle font = {"weight": "normal", "size": 15} @@ -65,7 +64,6 @@ def print_outlier_ratio(y): results_online = np.empty((len(datasets), n_axis + 5)) for dat, dataset_name in enumerate(datasets): - print(dataset_name) # Loading datasets @@ -133,7 +131,6 @@ def print_outlier_ratio(y): gamma = 1 / n_features # OCSVM default parameter for random_state in random_states: - print("random state: %s" % random_state) X, y = shuffle(X, y, random_state=random_state) diff --git a/benchmarks/bench_pca_solvers.py b/benchmarks/bench_pca_solvers.py new file mode 100644 index 0000000000000..337af3a42e900 --- /dev/null +++ b/benchmarks/bench_pca_solvers.py @@ -0,0 +1,165 @@ +# %% +# +# This benchmark compares the speed of PCA solvers on datasets of different +# sizes in order to determine the best solver to select by default via the +# "auto" heuristic. +# +# Note: we do not control for the accuracy of the solvers: we assume that all +# solvers yield transformed data with similar explained variance. This +# assumption is generally true, except for the randomized solver that might +# require more power iterations. +# +# We generate synthetic data with dimensions that are useful to plot: +# - time vs n_samples for a fixed n_features and, +# - time vs n_features for a fixed n_samples for a fixed n_features. +import itertools +from math import log10 +from time import perf_counter + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from sklearn import config_context +from sklearn.decomposition import PCA + +REF_DIMS = [100, 1000, 10_000] +data_shapes = [] +for ref_dim in REF_DIMS: + data_shapes.extend([(ref_dim, 10**i) for i in range(1, 8 - int(log10(ref_dim)))]) + data_shapes.extend( + [(ref_dim, 3 * 10**i) for i in range(1, 8 - int(log10(ref_dim)))] + ) + data_shapes.extend([(10**i, ref_dim) for i in range(1, 8 - int(log10(ref_dim)))]) + data_shapes.extend( + [(3 * 10**i, ref_dim) for i in range(1, 8 - int(log10(ref_dim)))] + ) + +# Remove duplicates: +data_shapes = sorted(set(data_shapes)) + +print("Generating test datasets...") +rng = np.random.default_rng(0) +datasets = [rng.normal(size=shape) for shape in data_shapes] + + +# %% +def measure_one(data, n_components, solver, method_name="fit"): + print( + f"Benchmarking {solver=!r}, {n_components=}, {method_name=!r} on data with" + f" shape {data.shape}" + ) + pca = PCA(n_components=n_components, svd_solver=solver, random_state=0) + timings = [] + elapsed = 0 + method = getattr(pca, method_name) + with config_context(assume_finite=True): + while elapsed < 0.5: + tic = perf_counter() + method(data) + duration = perf_counter() - tic + timings.append(duration) + elapsed += duration + return np.median(timings) + + +SOLVERS = ["full", "covariance_eigh", "arpack", "randomized", "auto"] +measurements = [] +for data, n_components, method_name in itertools.product( + datasets, [2, 50], ["fit", "fit_transform"] +): + if n_components >= min(data.shape): + continue + for solver in SOLVERS: + if solver == "covariance_eigh" and data.shape[1] > 5000: + # Too much memory and too slow. + continue + if solver in ["arpack", "full"] and log10(data.size) > 7: + # Too slow, in particular for the full solver. + continue + time = measure_one(data, n_components, solver, method_name=method_name) + measurements.append( + { + "n_components": n_components, + "n_samples": data.shape[0], + "n_features": data.shape[1], + "time": time, + "solver": solver, + "method_name": method_name, + } + ) +measurements = pd.DataFrame(measurements) +measurements.to_csv("bench_pca_solvers.csv", index=False) + +# %% +all_method_names = measurements["method_name"].unique() +all_n_components = measurements["n_components"].unique() + +for method_name in all_method_names: + fig, axes = plt.subplots( + figsize=(16, 16), + nrows=len(REF_DIMS), + ncols=len(all_n_components), + sharey=True, + constrained_layout=True, + ) + fig.suptitle(f"Benchmarks for PCA.{method_name}, varying n_samples", fontsize=16) + + for row_idx, ref_dim in enumerate(REF_DIMS): + for n_components, ax in zip(all_n_components, axes[row_idx]): + for solver in SOLVERS: + if solver == "auto": + style_kwargs = dict(linewidth=2, color="black", style="--") + else: + style_kwargs = dict(style="o-") + ax.set( + title=f"n_components={n_components}, n_features={ref_dim}", + ylabel="time (s)", + ) + measurements.query( + "n_components == @n_components and n_features == @ref_dim" + " and solver == @solver and method_name == @method_name" + ).plot.line( + x="n_samples", + y="time", + label=solver, + logx=True, + logy=True, + ax=ax, + **style_kwargs, + ) +# %% +for method_name in all_method_names: + fig, axes = plt.subplots( + figsize=(16, 16), + nrows=len(REF_DIMS), + ncols=len(all_n_components), + sharey=True, + ) + fig.suptitle(f"Benchmarks for PCA.{method_name}, varying n_features", fontsize=16) + + for row_idx, ref_dim in enumerate(REF_DIMS): + for n_components, ax in zip(all_n_components, axes[row_idx]): + for solver in SOLVERS: + if solver == "auto": + style_kwargs = dict(linewidth=2, color="black", style="--") + else: + style_kwargs = dict(style="o-") + ax.set( + title=f"n_components={n_components}, n_samples={ref_dim}", + ylabel="time (s)", + ) + measurements.query( + "n_components == @n_components and n_samples == @ref_dim " + " and solver == @solver and method_name == @method_name" + ).plot.line( + x="n_features", + y="time", + label=solver, + logx=True, + logy=True, + ax=ax, + **style_kwargs, + ) + +# %% diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py index edbf9412deca2..1d420d1dabe5d 100644 --- a/benchmarks/bench_plot_fastkmeans.py +++ b/benchmarks/bench_plot_fastkmeans.py @@ -8,7 +8,6 @@ def compute_bench(samples_range, features_range): - it = 0 results = defaultdict(lambda: []) chunk = 100 diff --git a/benchmarks/bench_plot_hierarchical.py b/benchmarks/bench_plot_hierarchical.py index 856203259e8ee..861a0ea0b5296 100644 --- a/benchmarks/bench_plot_hierarchical.py +++ b/benchmarks/bench_plot_hierarchical.py @@ -8,7 +8,6 @@ def compute_bench(samples_range, features_range): - it = 0 results = defaultdict(lambda: []) diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py index 0f42e4b630f1d..49b87c8c7060a 100644 --- a/benchmarks/bench_plot_incremental_pca.py +++ b/benchmarks/bench_plot_incremental_pca.py @@ -7,13 +7,15 @@ """ -import numpy as np import gc -from time import time from collections import defaultdict +from time import time + import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import fetch_lfw_people -from sklearn.decomposition import IncrementalPCA, PCA +from sklearn.decomposition import PCA, IncrementalPCA def plot_results(X, y, label): diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py index 4373c70223976..3b46e447401cb 100644 --- a/benchmarks/bench_plot_lasso_path.py +++ b/benchmarks/bench_plot_lasso_path.py @@ -2,20 +2,19 @@ The input data is mostly low rank but is a fat infinite tail. """ -from collections import defaultdict + import gc import sys +from collections import defaultdict from time import time import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram -from sklearn.linear_model import lasso_path from sklearn.datasets import make_regression +from sklearn.linear_model import lars_path, lars_path_gram, lasso_path def compute_bench(samples_range, features_range): - it = 0 results = defaultdict(lambda: []) diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py index c6e5541eda6f3..2cedb19fb23c4 100644 --- a/benchmarks/bench_plot_neighbors.py +++ b/benchmarks/bench_plot_neighbors.py @@ -1,13 +1,14 @@ """ Plot the scaling of the nearest neighbors algorithms with k, D, and N """ + from time import time -import numpy as np import matplotlib.pyplot as plt +import numpy as np from matplotlib import ticker -from sklearn import neighbors, datasets +from sklearn import datasets, neighbors def get_data(N, D, dataset="dense"): diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index 78d6ad875cc34..f05ede117191b 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -1,33 +1,31 @@ """ Benchmarks of Non-Negative Matrix Factorization """ + # Authors: Tom Dupre la Tour (benchmark) # Chih-Jen Linn (original projected gradient NMF implementation) # Anthony Di Franco (projected gradient, Python and NumPy port) # License: BSD 3 clause -from time import time +import numbers import sys import warnings -import numbers +from time import time -import numpy as np import matplotlib.pyplot as plt -from joblib import Memory +import numpy as np import pandas +from joblib import Memory -from sklearn.utils._testing import ignore_warnings -from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.decomposition import NMF -from sklearn.decomposition._nmf import _initialize_nmf -from sklearn.decomposition._nmf import _beta_divergence -from sklearn.decomposition._nmf import _check_init +from sklearn.decomposition._nmf import _beta_divergence, _check_init, _initialize_nmf from sklearn.exceptions import ConvergenceWarning -from sklearn.utils.extmath import safe_sparse_dot, squared_norm +from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.utils import check_array +from sklearn.utils._testing import ignore_warnings +from sklearn.utils.extmath import safe_sparse_dot, squared_norm from sklearn.utils.validation import check_is_fitted, check_non_negative - mem = Memory(cachedir=".", verbose=0) ################### @@ -41,7 +39,7 @@ def _norm(x): """Dot product-based Euclidean norm implementation - See: http://fseoane.net/blog/2011/computing-the-vector-norm/ + See: https://fa.bianp.net/blog/2011/computing-the-vector-norm/ """ return np.sqrt(squared_norm(x)) @@ -261,8 +259,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0: raise ValueError( "Maximum number of iterations must be a positive " - "integer; got (max_iter=%r)" - % self.max_iter + "integer; got (max_iter=%r)" % self.max_iter ) if not isinstance(self.tol, numbers.Number) or self.tol < 0: raise ValueError( @@ -308,8 +305,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): if n_iter == self.max_iter and self.tol > 0: warnings.warn( "Maximum number of iteration %d reached. Increase it" - " to improve convergence." - % self.max_iter, + " to improve convergence." % self.max_iter, ConvergenceWarning, ) diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py index 4325e1fb17f3c..8a4bc9b1a34fe 100644 --- a/benchmarks/bench_plot_omp_lars.py +++ b/benchmarks/bench_plot_omp_lars.py @@ -3,18 +3,18 @@ The input data is mostly low rank but is a fat infinite tail. """ + import gc import sys from time import time import numpy as np -from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp from sklearn.datasets import make_sparse_coded_signal +from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp def compute_bench(samples_range, features_range): - it = 0 results = dict() @@ -27,7 +27,7 @@ def compute_bench(samples_range, features_range): for i_s, n_samples in enumerate(samples_range): for i_f, n_features in enumerate(features_range): it += 1 - n_informative = n_features / 10 + n_informative = n_features // 10 print("====================") print("Iteration %03d of %03d" % (it, max_it)) print("====================") @@ -46,12 +46,11 @@ def compute_bench(samples_range, features_range): "n_features": n_samples, "n_nonzero_coefs": n_informative, "random_state": 0, - "data_transposed": True, } print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) y, X, _ = make_sparse_coded_signal(**dataset_kwargs) - X = np.asfortranarray(X) + X = np.asfortranarray(X.T) gc.collect() print("benchmarking lars_path (with Gram):", end="") diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py index a41e3fab20589..ca12972f9be6c 100644 --- a/benchmarks/bench_plot_parallel_pairwise.py +++ b/benchmarks/bench_plot_parallel_pairwise.py @@ -4,9 +4,8 @@ import matplotlib.pyplot as plt +from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels from sklearn.utils import check_random_state -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.metrics.pairwise import pairwise_kernels def plot(func): diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py index b21589263a49f..a80455e21c255 100644 --- a/benchmarks/bench_plot_polynomial_kernel_approximation.py +++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py @@ -30,33 +30,34 @@ [1] Pham, N., & Pagh, R. (2013, August). Fast and scalable polynomial kernels via explicit feature maps. In Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining (pp. 239-247) -(http://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf) +(https://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf) [2] Charikar, M., Chen, K., & Farach-Colton, M. (2002, July). Finding frequent items in data streams. In International Colloquium on Automata, Languages, and Programming (pp. 693-703). Springer, Berlin, Heidelberg. -(http://www.vldb.org/pvldb/1/1454225.pdf) +(https://people.cs.rutgers.edu/~farach/pubs/FrequentStream.pdf) """ + # Author: Daniel Lopez-Sanchez # License: BSD 3 clause # Load data manipulation functions -from sklearn.datasets import load_digits -from sklearn.model_selection import train_test_split +# Will use this for timing results +from time import time # Some common libraries import matplotlib.pyplot as plt import numpy as np -# Will use this for timing results -from time import time - -# Import SVM classifiers and feature map approximation algorithms -from sklearn.svm import LinearSVC, SVC +from sklearn.datasets import load_digits from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch +from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline +# Import SVM classifiers and feature map approximation algorithms +from sklearn.svm import SVC, LinearSVC + # Split data in train and test sets X, y = load_digits()["data"], load_digits()["target"] X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7) diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py index ecc1bbb92ce61..6bb5618b3633f 100644 --- a/benchmarks/bench_plot_randomized_svd.py +++ b/benchmarks/bench_plot_randomized_svd.py @@ -65,28 +65,29 @@ # Author: Giorgio Patrini -import numpy as np -import scipy as sp -import matplotlib.pyplot as plt - import gc +import os.path import pickle -from time import time from collections import defaultdict -import os.path +from time import time + +import matplotlib.pyplot as plt +import numpy as np +import scipy as sp -from sklearn.utils._arpack import _init_arpack_v0 -from sklearn.utils import gen_batches -from sklearn.utils.validation import check_random_state -from sklearn.utils.extmath import randomized_svd -from sklearn.datasets import make_low_rank_matrix, make_sparse_uncorrelated from sklearn.datasets import ( - fetch_lfw_people, - fetch_openml, fetch_20newsgroups_vectorized, + fetch_lfw_people, fetch_olivetti_faces, + fetch_openml, fetch_rcv1, + make_low_rank_matrix, + make_sparse_uncorrelated, ) +from sklearn.utils import gen_batches +from sklearn.utils._arpack import _init_arpack_v0 +from sklearn.utils.extmath import randomized_svd +from sklearn.utils.validation import check_random_state try: import fbpca @@ -191,7 +192,7 @@ def get_data(dataset_name): del row del col else: - X = fetch_openml(dataset_name, parser="auto").data + X = fetch_openml(dataset_name).data return X @@ -342,7 +343,6 @@ def scalable_frobenius_norm_discrepancy(X, U, s, V): def bench_a(X, dataset_name, power_iter, n_oversamples, n_comps): - all_time = defaultdict(list) if enable_spectral_norm: all_spectral = defaultdict(list) @@ -398,7 +398,6 @@ def bench_a(X, dataset_name, power_iter, n_oversamples, n_comps): def bench_b(power_list): - n_samples, n_features = 1000, 10000 data_params = { "n_samples": n_samples, diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py index 52d22f6a9c8a0..ed99d1c44e2fd 100644 --- a/benchmarks/bench_plot_svd.py +++ b/benchmarks/bench_plot_svd.py @@ -2,18 +2,19 @@ The data is mostly low rank but is a fat infinite tail. """ + import gc -from time import time -import numpy as np from collections import defaultdict +from time import time +import numpy as np from scipy.linalg import svd -from sklearn.utils.extmath import randomized_svd + from sklearn.datasets import make_low_rank_matrix +from sklearn.utils.extmath import randomized_svd def compute_bench(samples_range, features_range, n_iter=3, rank=50): - it = 0 results = defaultdict(lambda: []) diff --git a/benchmarks/bench_plot_ward.py b/benchmarks/bench_plot_ward.py index 696e833eede20..fe5cee201dff4 100644 --- a/benchmarks/bench_plot_ward.py +++ b/benchmarks/bench_plot_ward.py @@ -4,9 +4,9 @@ import time +import matplotlib.pyplot as plt import numpy as np from scipy.cluster import hierarchy -import matplotlib.pyplot as plt from sklearn.cluster import AgglomerativeClustering diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py index 89a4550944f3f..6551de690994b 100644 --- a/benchmarks/bench_random_projections.py +++ b/benchmarks/bench_random_projections.py @@ -6,19 +6,20 @@ Benchmarks for random projections. """ + +import collections import gc -import sys import optparse +import sys from datetime import datetime -import collections import numpy as np import scipy.sparse as sp from sklearn import clone from sklearn.random_projection import ( - SparseRandomProjection, GaussianRandomProjection, + SparseRandomProjection, johnson_lindenstrauss_min_dim, ) diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py index e8fce1c414abf..166c6c2f5f9d1 100644 --- a/benchmarks/bench_rcv1_logreg_convergence.py +++ b/benchmarks/bench_rcv1_logreg_convergence.py @@ -3,14 +3,15 @@ # # License: BSD 3 clause -import matplotlib.pyplot as plt -from joblib import Memory -import numpy as np import gc import time -from sklearn.linear_model import LogisticRegression, SGDClassifier +import matplotlib.pyplot as plt +import numpy as np +from joblib import Memory + from sklearn.datasets import fetch_rcv1 +from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.linear_model._sag import get_auto_step_size try: @@ -240,7 +241,7 @@ def get_max_squared_sum(X): SGDClassifier( alpha=1.0 / C / n_samples, penalty="l2", - loss="log", + loss="log_loss", fit_intercept=fit_intercept, verbose=0, ), diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py index 581f7e3881e9e..97d4ba7b4b75b 100644 --- a/benchmarks/bench_saga.py +++ b/benchmarks/bench_saga.py @@ -3,26 +3,27 @@ Benchmarks of sklearn SAGA vs lightning SAGA vs Liblinear. Shows the gain in using multinomial logistic regression in term of learning time. """ + import json -import time import os +import time -from joblib import Parallel -from sklearn.utils.fixes import delayed import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import ( + fetch_20newsgroups_vectorized, fetch_rcv1, - load_iris, load_digits, - fetch_20newsgroups_vectorized, + load_iris, ) from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss from sklearn.model_selection import train_test_split +from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import LabelBinarizer, LabelEncoder from sklearn.utils.extmath import safe_sparse_dot, softmax +from sklearn.utils.parallel import Parallel, delayed def fit_single( @@ -95,7 +96,6 @@ def fit_single( else: lr = LogisticRegression( solver=solver, - multi_class=multi_class, C=C, penalty=penalty, fit_intercept=False, @@ -103,6 +103,8 @@ def fit_single( max_iter=this_max_iter, random_state=42, ) + if multi_class == "ovr": + lr = OneVsRestClassifier(lr) # Makes cpu cache even for all fit calls X_train.max() @@ -118,10 +120,12 @@ def fit_single( except NotImplementedError: # Lightning predict_proba is not implemented for n_classes > 2 y_pred = _predict_proba(lr, X) + if isinstance(lr, OneVsRestClassifier): + coef = np.concatenate([est.coef_ for est in lr.estimators_]) + else: + coef = lr.coef_ score = log_loss(y, y_pred, normalize=False) / n_samples - score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum( - np.abs(lr.coef_) - ) + score += 0.5 * alpha * np.sum(coef**2) + beta * np.sum(np.abs(coef)) scores.append(score) train_score, test_score = tuple(scores) @@ -135,6 +139,7 @@ def fit_single( def _predict_proba(lr, X): + """Predict proba for lightning for n_classes >=3.""" pred = safe_sparse_dot(X, lr.coef_.T) if hasattr(lr, "intercept_"): pred += lr.intercept_ diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py index 4f1041a6d1022..39cf1a11ffed6 100644 --- a/benchmarks/bench_sample_without_replacement.py +++ b/benchmarks/bench_sample_without_replacement.py @@ -2,15 +2,16 @@ Benchmarks for sampling without replacement of integer. """ + import gc -import sys +import operator import optparse +import random +import sys from datetime import datetime -import operator import matplotlib.pyplot as plt import numpy as np -import random from sklearn.utils.random import sample_without_replacement @@ -105,47 +106,53 @@ def bench_sample(sampling, n_population, n_samples): ########################################################################### # Set Python core input - sampling_algorithm[ - "python-core-sample" - ] = lambda n_population, n_sample: random.sample(range(n_population), n_sample) + sampling_algorithm["python-core-sample"] = ( + lambda n_population, n_sample: random.sample(range(n_population), n_sample) + ) ########################################################################### # Set custom automatic method selection - sampling_algorithm[ - "custom-auto" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="auto", random_state=random_state + sampling_algorithm["custom-auto"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, n_samples, method="auto", random_state=random_state + ) ) ########################################################################### # Set custom tracking based method - sampling_algorithm[ - "custom-tracking-selection" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="tracking_selection", random_state=random_state + sampling_algorithm["custom-tracking-selection"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, + n_samples, + method="tracking_selection", + random_state=random_state, + ) ) ########################################################################### # Set custom reservoir based method - sampling_algorithm[ - "custom-reservoir-sampling" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="reservoir_sampling", random_state=random_state + sampling_algorithm["custom-reservoir-sampling"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, + n_samples, + method="reservoir_sampling", + random_state=random_state, + ) ) ########################################################################### # Set custom reservoir based method - sampling_algorithm[ - "custom-pool" - ] = lambda n_population, n_samples, random_state=None: sample_without_replacement( - n_population, n_samples, method="pool", random_state=random_state + sampling_algorithm["custom-pool"] = ( + lambda n_population, n_samples, random_state=None: sample_without_replacement( + n_population, n_samples, method="pool", random_state=random_state + ) ) ########################################################################### # Numpy permutation based - sampling_algorithm[ - "numpy-permutation" - ] = lambda n_population, n_sample: np.random.permutation(n_population)[:n_sample] + sampling_algorithm["numpy-permutation"] = ( + lambda n_population, n_sample: np.random.permutation(n_population)[:n_sample] + ) ########################################################################### # Remove unspecified algorithm @@ -208,7 +215,7 @@ def bench_sample(sampling, n_population, n_samples): print("") fig = plt.figure("scikit-learn sample w/o replacement benchmark results") - plt.title("n_population = %s, n_times = %s" % (opts.n_population, opts.n_times)) + fig.suptitle("n_population = %s, n_times = %s" % (opts.n_population, opts.n_times)) ax = fig.add_subplot(111) for name in sampling_algorithm: ax.plot(ratio, time[name], label=name) diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py index 47dd9e9fc758b..4b1b902795feb 100644 --- a/benchmarks/bench_sgd_regression.py +++ b/benchmarks/bench_sgd_regression.py @@ -1,16 +1,15 @@ # Author: Peter Prettenhofer # License: BSD 3 clause -import numpy as np -import matplotlib.pyplot as plt - import gc - from time import time -from sklearn.linear_model import Ridge, SGDRegressor, ElasticNet -from sklearn.metrics import mean_squared_error +import matplotlib.pyplot as plt +import numpy as np + from sklearn.datasets import make_regression +from sklearn.linear_model import ElasticNet, Ridge, SGDRegressor +from sklearn.metrics import mean_squared_error """ Benchmark for SGD regression diff --git a/benchmarks/bench_sparsify.py b/benchmarks/bench_sparsify.py index f1aa482b8b732..1832ca40c6ddb 100644 --- a/benchmarks/bench_sparsify.py +++ b/benchmarks/bench_sparsify.py @@ -43,8 +43,9 @@ 60 300 381409 1271.4 97.1 clf.predict(X_test_sparse) """ -from scipy.sparse import csr_matrix import numpy as np +from scipy.sparse import csr_matrix + from sklearn.linear_model import SGDRegressor from sklearn.metrics import r2_score diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py index 4f40e87f74e14..2eab7071544f9 100644 --- a/benchmarks/bench_text_vectorizers.py +++ b/benchmarks/bench_text_vectorizers.py @@ -8,8 +8,9 @@ * psutil (optional, but recommended) """ -import timeit + import itertools +import timeit import numpy as np import pandas as pd @@ -18,8 +19,8 @@ from sklearn.datasets import fetch_20newsgroups from sklearn.feature_extraction.text import ( CountVectorizer, - TfidfVectorizer, HashingVectorizer, + TfidfVectorizer, ) n_repeat = 3 @@ -45,7 +46,6 @@ def f(): [CountVectorizer, TfidfVectorizer, HashingVectorizer], [("word", (1, 1)), ("word", (1, 2)), ("char", (4, 4)), ("char_wb", (4, 4))], ): - bench = {"vectorizer": Vectorizer.__name__} params = {"analyzer": analyzer, "ngram_range": ngram_range} bench.update(params) diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py index 1809cb7c5e9c0..c522bcb39e994 100644 --- a/benchmarks/bench_tree.py +++ b/benchmarks/bench_tree.py @@ -13,11 +13,13 @@ training set, classify a sample and plot the time taken as a function of the number of dimensions. """ -import numpy as np -import matplotlib.pyplot as plt + import gc from datetime import datetime +import matplotlib.pyplot as plt +import numpy as np + # to store the results scikit_classifier_results = [] scikit_regressor_results = [] @@ -60,7 +62,6 @@ def bench_scikit_tree_regressor(X, Y): if __name__ == "__main__": - print("============================================") print("Warning: this is going to take a looong time") print("============================================") diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py index aa1a07a67ef44..813fffcf29141 100644 --- a/benchmarks/bench_tsne_mnist.py +++ b/benchmarks/bench_tsne_mnist.py @@ -7,18 +7,19 @@ # License: BSD 3 clause +import argparse +import json import os import os.path as op from time import time + import numpy as np -import json -import argparse from joblib import Memory from sklearn.datasets import fetch_openml +from sklearn.decomposition import PCA from sklearn.manifold import TSNE from sklearn.neighbors import NearestNeighbors -from sklearn.decomposition import PCA from sklearn.utils import check_array from sklearn.utils import shuffle as _shuffle from sklearn.utils._openmp_helpers import _openmp_effective_n_threads @@ -35,7 +36,7 @@ def load_data(dtype=np.float32, order="C", shuffle=True, seed=0): """Load the data, then cache and memmap the train/test split""" print("Loading dataset...") - data = fetch_openml("mnist_784", as_frame=True, parser="pandas") + data = fetch_openml("mnist_784", as_frame=True) X = check_array(data["data"], dtype=dtype, order=order) y = data["target"] @@ -160,7 +161,6 @@ def bhtsne(X): methods.append(("lvdmaaten/bhtsne", bhtsne)) if args.profile: - try: from memory_profiler import profile except ImportError as e: diff --git a/benchmarks/plot_tsne_mnist.py b/benchmarks/plot_tsne_mnist.py index d32e3dd769d6a..fff71eed0a26c 100644 --- a/benchmarks/plot_tsne_mnist.py +++ b/benchmarks/plot_tsne_mnist.py @@ -1,9 +1,8 @@ -import matplotlib.pyplot as plt -import numpy as np -import os.path as op - import argparse +import os.path as op +import matplotlib.pyplot as plt +import numpy as np LOG_DIR = "mnist_tsne_output" diff --git a/build_tools/azure/combine_coverage_reports.sh b/build_tools/azure/combine_coverage_reports.sh new file mode 100755 index 0000000000000..c3b90fdd4fcdb --- /dev/null +++ b/build_tools/azure/combine_coverage_reports.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -e + +# Defines the show_installed_libraries and activate_environment functions. +source build_tools/shared.sh + +activate_environment + +# Combine all coverage files generated by subprocesses workers such +# such as pytest-xdist and joblib/loky: +pushd $TEST_DIR +coverage combine --append +coverage xml +popd + +# Copy the combined coverage file to the root of the repository: +cp $TEST_DIR/coverage.xml $BUILD_REPOSITORY_LOCALPATH diff --git a/build_tools/azure/debian_atlas_32bit_lock.txt b/build_tools/azure/debian_atlas_32bit_lock.txt index 0e2ff3ac6dbb8..7971e64b72560 100644 --- a/build_tools/azure/debian_atlas_32bit_lock.txt +++ b/build_tools/azure/debian_atlas_32bit_lock.txt @@ -1,28 +1,45 @@ # -# This file is autogenerated by pip-compile with python 3.9 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: # # pip-compile --output-file=build_tools/azure/debian_atlas_32bit_lock.txt build_tools/azure/debian_atlas_32bit_requirements.txt # -attrs==22.1.0 +attrs==23.2.0 # via pytest -cython==0.29.32 +coverage==7.5.1 + # via pytest-cov +cython==3.0.10 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -joblib==1.1.1 - # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -more-itertools==9.0.0 - # via pytest -packaging==21.3 +iniconfig==2.0.0 # via pytest -pluggy==0.13.1 +joblib==1.2.0 + # via -r build_tools/azure/debian_atlas_32bit_requirements.txt +meson==1.4.0 + # via meson-python +meson-python==0.16.0 + # via -r build_tools/azure/debian_atlas_32bit_requirements.txt +ninja==1.11.1.1 + # via -r build_tools/azure/debian_atlas_32bit_requirements.txt +packaging==24.0 + # via + # meson-python + # pyproject-metadata + # pytest +pluggy==1.5.0 # via pytest py==1.11.0 # via pytest -pyparsing==3.0.9 - # via packaging -pytest==5.3.1 +pyproject-metadata==0.8.0 + # via meson-python +pytest==7.1.2 + # via + # -r build_tools/azure/debian_atlas_32bit_requirements.txt + # pytest-cov +pytest-cov==2.9.0 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -threadpoolctl==2.2.0 +threadpoolctl==3.1.0 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt -wcwidth==0.2.5 - # via pytest +tomli==2.0.1 + # via + # meson-python + # pytest diff --git a/build_tools/azure/debian_atlas_32bit_requirements.txt b/build_tools/azure/debian_atlas_32bit_requirements.txt index 6ce3aa8615eb6..615193a71fc6b 100644 --- a/build_tools/azure/debian_atlas_32bit_requirements.txt +++ b/build_tools/azure/debian_atlas_32bit_requirements.txt @@ -1,7 +1,10 @@ # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py -cython -joblib==1.1.1 # min -threadpoolctl==2.2.0 -pytest==5.3.1 # min +cython==3.0.10 # min +joblib==1.2.0 # min +threadpoolctl==3.1.0 +pytest==7.1.2 # min +pytest-cov==2.9.0 # min +ninja +meson-python diff --git a/build_tools/azure/get_commit_message.py b/build_tools/azure/get_commit_message.py index b6a4fc9d750e0..0b1246b8d2724 100644 --- a/build_tools/azure/get_commit_message.py +++ b/build_tools/azure/get_commit_message.py @@ -1,6 +1,6 @@ +import argparse import os import subprocess -import argparse def get_commit_message(): @@ -19,6 +19,19 @@ def get_commit_message(): else: commit_message = build_source_version_message + # Sanitize the commit message to avoid introducing a vulnerability: a PR + # submitter could include the "##vso" special marker in their commit + # message to attempt to obfuscate the injection of arbitrary commands in + # the Azure pipeline. + # + # This can be a problem if the PR reviewers do not pay close enough + # attention to the full commit message prior to clicking the merge button + # and as a result make the inject code run in a protected branch with + # elevated access to CI secrets. On a protected branch, Azure + # already sanitizes `BUILD_SOURCEVERSIONMESSAGE`, but the message + # will still be sanitized here out of precaution. + commit_message = commit_message.replace("##vso", "..vso") + return commit_message diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh index 08bc126066c9d..3016361a6bfdc 100755 --- a/build_tools/azure/install.sh +++ b/build_tools/azure/install.sh @@ -29,7 +29,6 @@ setup_ccache() { pre_python_environment_install() { if [[ "$DISTRIB" == "ubuntu" ]]; then - sudo add-apt-repository --remove ppa:ubuntu-toolchain-r/test sudo apt-get update sudo apt-get install python3-scipy python3-matplotlib \ libatlas3-base libatlas-base-dev python3-virtualenv ccache @@ -44,27 +43,26 @@ pre_python_environment_install() { # need compilers apt-get -yq update apt-get -yq install build-essential + fi - elif [[ "$DISTRIB" == "pip-nogil" ]]; then - echo "deb-src http://archive.ubuntu.com/ubuntu/ focal main" | sudo tee -a /etc/apt/sources.list - sudo apt-get -yq update - sudo apt-get install -yq ccache - sudo apt-get build-dep -yq python3 python3-dev - setup_ccache # speed-up the build of CPython itself - # build Python nogil - PYTHON_NOGIL_CLONE_PATH=../nogil - git clone --depth 1 https://github.com/colesbury/nogil $PYTHON_NOGIL_CLONE_PATH - cd $PYTHON_NOGIL_CLONE_PATH - ./configure && make -j 2 - export PYTHON_NOGIL_PATH="${PYTHON_NOGIL_CLONE_PATH}/python" - cd $OLDPWD +} - fi +check_packages_dev_version() { + for package in $@; do + package_version=$(python -c "import $package; print($package.__version__)") + if ! [[ $package_version =~ "dev" ]]; then + echo "$package is not a development version: $package_version" + exit 1 + fi + done } python_environment_install_and_activate() { if [[ "$DISTRIB" == "conda"* ]]; then - conda update -n base conda -y + # Install/update conda with the libmamba solver because the legacy + # solver can be slow at installing a specific version of conda-lock. + conda install -n base conda conda-libmamba-solver -y + conda config --set solver libmamba conda install -c conda-forge "$(get_dep conda-lock min)" -y conda-lock install --name $VIRTUALENV $LOCK_FILE source activate $VIRTUALENV @@ -75,21 +73,30 @@ python_environment_install_and_activate() { pip install -r "${LOCK_FILE}" elif [[ "$DISTRIB" == "pip-nogil" ]]; then - ${PYTHON_NOGIL_PATH} -m venv $VIRTUALENV + python -m venv $VIRTUALENV source $VIRTUALENV/bin/activate pip install -r "${LOCK_FILE}" fi if [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then echo "Installing development dependency wheels" - dev_anaconda_url=https://pypi.anaconda.org/scipy-wheels-nightly/simple - pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url numpy pandas scipy - echo "Installing Cython from PyPI enabling pre-releases" - pip install --pre cython - echo "Installing joblib master" + dev_anaconda_url=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple + dev_packages="numpy scipy pandas" + pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url $dev_packages + + check_packages_dev_version $dev_packages + + echo "Installing Cython from latest sources" + pip install https://github.com/cython/cython/archive/master.zip + echo "Installing joblib from latest sources" pip install https://github.com/joblib/joblib/archive/master.zip - echo "Installing pillow master" + echo "Installing pillow from latest sources" pip install https://github.com/python-pillow/Pillow/archive/main.zip + + elif [[ "$DISTRIB" == "pip-nogil" ]]; then + apt-get -yq update + apt-get install -yq ccache + fi } @@ -105,6 +112,12 @@ scikit_learn_install() { # Without openmp, we use the system clang. Here we use /usr/bin/ar # instead because llvm-ar errors export AR=/usr/bin/ar + # Make sure omp.h is not present in the conda environment, so that + # using an unprotected "cimport openmp" will make this build fail. At + # the time of writing (2023-01-13), on OSX, blas (mkl or openblas) + # brings in openmp so that you end up having the omp.h include inside + # the conda environment. + find $CONDA_PREFIX -name omp.h -delete -print fi if [[ "$UNAMESTR" == "Linux" ]]; then @@ -113,19 +126,26 @@ scikit_learn_install() { export LDFLAGS="$LDFLAGS -Wl,--sysroot=/" fi - # TODO use a specific variable for this rather than using a particular build ... - if [[ "$DISTRIB" == "conda-pip-latest" ]]; then + if [[ "$BUILD_WITH_SETUPTOOLS" == "true" ]]; then + python setup.py develop + elif [[ "$PIP_BUILD_ISOLATION" == "true" ]]; then # Check that pip can automatically build scikit-learn with the build # dependencies specified in pyproject.toml using an isolated build # environment: - pip install --verbose --editable . + pip install --verbose . else + if [[ "$UNAMESTR" == "MINGW64"* ]]; then + # Needed on Windows CI to compile with Visual Studio compiler + # otherwise Meson detects a MINGW64 platform and use MINGW64 + # toolchain + ADDITIONAL_PIP_OPTIONS='-Csetup-args=--vsenv' + fi # Use the pre-installed build dependencies and build directly in the # current environment. - python setup.py develop + pip install --verbose --no-build-isolation --editable . $ADDITIONAL_PIP_OPTIONS fi - ccache -s + ccache -s || echo "ccache not installed, skipping ccache statistics" } main() { diff --git a/build_tools/azure/install_pyodide.sh b/build_tools/azure/install_pyodide.sh new file mode 100644 index 0000000000000..58d0348a53202 --- /dev/null +++ b/build_tools/azure/install_pyodide.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -e + +git clone https://github.com/emscripten-core/emsdk.git +cd emsdk +./emsdk install $EMSCRIPTEN_VERSION +./emsdk activate $EMSCRIPTEN_VERSION +source emsdk_env.sh +cd - + +pip install pyodide-build==$PYODIDE_VERSION pyodide-cli + +pyodide build + +ls -ltrh dist + +# The Pyodide js library is needed by build_tools/azure/test_script_pyodide.sh +# to run tests inside Pyodide +npm install pyodide@$PYODIDE_VERSION diff --git a/build_tools/azure/install_win.sh b/build_tools/azure/install_win.sh deleted file mode 100755 index b28bc86270925..0000000000000 --- a/build_tools/azure/install_win.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -e -set -x - -# defines the get_dep and show_installed_libraries functions -source build_tools/shared.sh - -if [[ "$DISTRIB" == "conda" ]]; then - conda update -n base conda -y - conda install pip -y - pip install "$(get_dep conda-lock min)" - conda-lock install --name $VIRTUALENV $LOCK_FILE - source activate $VIRTUALENV -else - python -m venv $VIRTUALENV - source $VIRTUALENV/Scripts/activate - pip install -r $LOCK_FILE -fi - -show_installed_libraries - -# Build scikit-learn -python setup.py bdist_wheel - -# Install the generated wheel package to test it -pip install --pre --no-index --find-links dist scikit-learn diff --git a/build_tools/azure/linting.sh b/build_tools/azure/linting.sh deleted file mode 100755 index 21ef53c8012dc..0000000000000 --- a/build_tools/azure/linting.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -set -e -# pipefail is necessary to propagate exit codes -set -o pipefail - -flake8 --show-source . -echo -e "No problem detected by flake8\n" - -# For docstrings and warnings of deprecated attributes to be rendered -# properly, the property decorator must come before the deprecated decorator -# (else they are treated as functions) - -# do not error when grep -B1 "@property" finds nothing -set +e -bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"` - -if [ ! -z "$bad_deprecation_property_order" ] -then - echo "property decorator should come before deprecated decorator" - echo "found the following occurrencies:" - echo $bad_deprecation_property_order - exit 1 -fi - -# Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE - -doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")" - -if [ ! -z "$doctest_directive" ] -then - echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:" - echo "$doctest_directive" - exit 1 -fi - -joblib_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/fixes.py")" - -if [ ! -z "$joblib_import" ]; then - echo "Use from sklearn.utils.fixes import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:" - echo "$joblib_import" - exit 1 -fi diff --git a/build_tools/azure/posix-all-parallel.yml b/build_tools/azure/posix-all-parallel.yml new file mode 100644 index 0000000000000..45d2b4569110f --- /dev/null +++ b/build_tools/azure/posix-all-parallel.yml @@ -0,0 +1,50 @@ +# This configuration allows enables a job based on `posix.yml` to have two modes: +# +# 1. When `[azure parallel]` *is not* in the commit message, then this job will +# run first. If this job succeeds, then all dependent jobs can run. +# 2. When `[azure parallel]` *is* in the commit message, then this job will +# run with name `{{ parameters.name }}_Parallel` along with all other jobs. +# +# To enable this template, all dependent jobs should check if this job succeeded +# or skipped by using: +# dependsOn: in(dependencies[{{ parameters.name }}]['result'], 'Succeeded', 'Skipped') + +parameters: + name: '' + vmImage: '' + matrix: [] + dependsOn: [] + condition: '' + commitMessage: '' + +jobs: + +# When [azure parallel] *is not* in the commit message, this job will run +# first. +- template: posix.yml + parameters: + name: ${{ parameters.name }} + vmImage: ${{ parameters.vmImage }} + matrix: ${{ parameters.matrix }} + dependsOn: ${{ parameters.dependsOn }} + condition: | + and( + ${{ parameters.condition }}, + not(contains(${{ parameters.commitMessage }}, '[azure parallel]')) + ) + +# When [azure parallel] *is* in the commit message, this job and dependent +# jobs will run in parallel. Implementation-wise, the job above is skipped and +# this job, named ${{ parameters.name }}_Parallel, will run in parallel with +# the other jobs. +- template: posix.yml + parameters: + name: ${{ parameters.name }}_Parallel + vmImage: ${{ parameters.vmImage }} + matrix: ${{ parameters.matrix }} + dependsOn: ${{ parameters.dependsOn }} + condition: | + and( + ${{ parameters.condition }}, + contains(${{ parameters.commitMessage }}, '[azure parallel]') + ) diff --git a/build_tools/azure/posix-docker.yml b/build_tools/azure/posix-docker.yml index 3b20382310f58..b00ca66c378ca 100644 --- a/build_tools/azure/posix-docker.yml +++ b/build_tools/azure/posix-docker.yml @@ -16,17 +16,12 @@ jobs: VIRTUALENV: 'testvenv' TEST_DIR: '$(Agent.WorkFolder)/tmp_folder' JUNITXML: 'test-data.xml' - OMP_NUM_THREADS: '2' - OPENBLAS_NUM_THREADS: '2' - CPU_COUNT: '2' SKLEARN_SKIP_NETWORK_TESTS: '1' PYTEST_XDIST_VERSION: 'latest' COVERAGE: 'false' - TEST_DOCSTRINGS: 'false' # Set in azure-pipelines.yml DISTRIB: '' DOCKER_CONTAINER: '' - SHOW_SHORT_SUMMARY: 'false' CREATE_ISSUE_ON_TRACKER: 'true' CCACHE_DIR: $(Pipeline.Workspace)/ccache CCACHE_COMPRESS: '1' @@ -60,23 +55,24 @@ jobs: - script: > docker container run --rm --volume $TEST_DIR:/temp_dir + --volume $BUILD_REPOSITORY_LOCALPATH:/repo_localpath --volume $PWD:/io --volume $CCACHE_DIR:/ccache -w /io --detach --name skcontainer + -e BUILD_SOURCESDIRECTORY=/io -e TEST_DIR=/temp_dir -e CCACHE_DIR=/ccache + -e BUILD_REPOSITORY_LOCALPATH=/repo_localpath + -e COVERAGE -e DISTRIB -e LOCK_FILE -e JUNITXML -e VIRTUALENV -e PYTEST_XDIST_VERSION - -e OMP_NUM_THREADS - -e OPENBLAS_NUM_THREADS -e SKLEARN_SKIP_NETWORK_TESTS -e SELECTED_TESTS - -e CPU_COUNT -e CCACHE_COMPRESS -e BUILD_SOURCEVERSIONMESSAGE -e BUILD_REASON @@ -89,6 +85,11 @@ jobs: - script: > docker exec skcontainer ./build_tools/azure/test_script.sh displayName: 'Test Library' + - script: > + docker exec skcontainer ./build_tools/azure/combine_coverage_reports.sh + condition: and(succeeded(), eq(variables['COVERAGE'], 'true'), + eq(variables['SELECTED_TESTS'], '')) + displayName: 'Combine coverage' - task: PublishTestResults@2 inputs: testResultsFiles: '$(TEST_DIR)/$(JUNITXML)' @@ -123,3 +124,10 @@ jobs: JUNIT_FILE: $(TEST_DIR)/$(JUNITXML) condition: and(succeededOrFailed(), eq(variables['CREATE_ISSUE_ON_TRACKER'], 'true'), eq(variables['Build.Reason'], 'Schedule')) + - bash: bash build_tools/azure/upload_codecov.sh + condition: and(succeeded(), eq(variables['COVERAGE'], 'true'), + eq(variables['SELECTED_TESTS'], '')) + displayName: 'Upload To Codecov' + retryCountOnTaskFailure: 5 + env: + CODECOV_TOKEN: $(CODECOV_TOKEN) diff --git a/build_tools/azure/posix.yml b/build_tools/azure/posix.yml index f93cd6e211231..35e5165d22c83 100644 --- a/build_tools/azure/posix.yml +++ b/build_tools/azure/posix.yml @@ -16,17 +16,12 @@ jobs: TEST_DIR: '$(Agent.WorkFolder)/tmp_folder' VIRTUALENV: 'testvenv' JUNITXML: 'test-data.xml' - OMP_NUM_THREADS: '2' - OPENBLAS_NUM_THREADS: '2' - CPU_COUNT: '2' SKLEARN_SKIP_NETWORK_TESTS: '1' CCACHE_DIR: $(Pipeline.Workspace)/ccache CCACHE_COMPRESS: '1' PYTEST_XDIST_VERSION: 'latest' COVERAGE: 'true' - TEST_DOCSTRINGS: 'false' CREATE_ISSUE_ON_TRACKER: 'true' - SHOW_SHORT_SUMMARY: 'false' strategy: matrix: ${{ insert }}: ${{ parameters.matrix }} @@ -64,12 +59,18 @@ jobs: - script: | build_tools/azure/test_docs.sh displayName: 'Test Docs' - condition: eq(variables['SELECTED_TESTS'], '') + condition: and(succeeded(), eq(variables['SELECTED_TESTS'], '')) - script: | build_tools/azure/test_pytest_soft_dependency.sh displayName: 'Test Soft Dependency' - condition: and(eq(variables['CHECK_PYTEST_SOFT_DEPENDENCY'], 'true'), + condition: and(succeeded(), + eq(variables['CHECK_PYTEST_SOFT_DEPENDENCY'], 'true'), eq(variables['SELECTED_TESTS'], '')) + - script: | + build_tools/azure/combine_coverage_reports.sh + condition: and(succeeded(), eq(variables['COVERAGE'], 'true'), + eq(variables['SELECTED_TESTS'], '')) + displayName: 'Combine coverage' - task: PublishTestResults@2 inputs: testResultsFiles: '$(TEST_DIR)/$(JUNITXML)' @@ -105,5 +106,6 @@ jobs: condition: and(succeeded(), eq(variables['COVERAGE'], 'true'), eq(variables['SELECTED_TESTS'], '')) displayName: 'Upload To Codecov' + retryCountOnTaskFailure: 5 env: CODECOV_TOKEN: $(CODECOV_TOKEN) diff --git a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock deleted file mode 100644 index f07d4d274bf27..0000000000000 --- a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock +++ /dev/null @@ -1,109 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: b8a0f3bd13671606365ba6bf6380fcc64a1188ae76d1d0999dda4e26371e7742 -@EXPLICIT -https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d79366c90128f5dc444be8 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2022.10.11-h06a4308_0.conda#e9b86b388e2cf59585fefca34037b783 -https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/linux-64/libgfortran4-7.5.0-ha8ba4b0_17.conda#e3883581cbf0a98672250c3e80d292bf -https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.5.0-ha8ba4b0_17.conda#ecb35c8952579d5c8dc56c6e076ba948 -https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd -https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd -https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 -https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/expat-2.4.9-h6a678d5_0.conda#3a6139fbcd96384855f0e6037502bf28 -https://repo.anaconda.com/pkgs/main/linux-64/giflib-5.2.1-h7b6447c_0.conda#c2583ad8de5051f19479580c58336f15 -https://repo.anaconda.com/pkgs/main/linux-64/icu-58.2-he6710b0_3.conda#48cc14d5ad1a9bcd8dac17211a8deb8b -https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h7f8727e_0.conda#a0571bd2254b360aef526307a17f3526 -https://repo.anaconda.com/pkgs/main/linux-64/lerc-3.0-h295c915_0.conda#b97309770412f10bed8d9448f6f98f87 -https://repo.anaconda.com/pkgs/main/linux-64/libdeflate-1.8-h7f8727e_5.conda#6942d65edab9a800900f43e750b3ad1f -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_2.conda#88a54b8f50e351c650e16f4ee781440c -https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.18-hf726d26_0.conda#10422bb3b9b022e27798fc368cda69ba -https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 -https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.2.4-h5eee18b_0.conda#f5f56389136bcd9ca92ee1d64afcceb3 -https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518dcadd6aaee9aae47ba9a671553 -https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.3-h295c915_1.conda#d9bd18f73ff566e08add10a54a3463cf -https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.3-h5eee18b_3.conda#0c616f387885c1bbb57ec0bd1e779ced -https://repo.anaconda.com/pkgs/main/linux-64/nspr-4.33-h295c915_0.conda#78454e8819eb6701abc74b2ab2889f21 -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1s-h7f8727e_0.conda#25f9c4e2394976be98d01cccef2ce43a -https://repo.anaconda.com/pkgs/main/linux-64/pcre-8.45-h295c915_0.conda#b32ccc24d1d9808618c1e898da60f68d -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.6-h5eee18b_0.conda#8abc704d4a473839d5351b43deb793bb -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 -https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e -https://repo.anaconda.com/pkgs/main/linux-64/glib-2.69.1-h4ff587b_1.conda#4c3eae7c0b8b1c8fb3046a0740313bbf -https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20210910-h7f8727e_0.conda#cf16006f8f24e4224ddce196471d2509 -https://repo.anaconda.com/pkgs/main/linux-64/libevent-2.1.12-h8f2d780_0.conda#8de03cd4b6ee0ddeb0571a5199db5637 -https://repo.anaconda.com/pkgs/main/linux-64/libllvm10-10.0.1-hbcb73fb_5.conda#198e840fc17a5bff7f1ee543ee1981b2 -https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.37-hbc83047_0.conda#689f903925dcf6c5ab7bc1de0f58b67b -https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.9.14-h74e7548_0.conda#2eafeb1cb5f00b034d150f3d70436e52 -https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.2-ha4553b6_0.conda#0e926a5f2e02fe4a9376ece4b732ce36 -https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4 -https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.12.1-h4a9f257_0.conda#bdc7b5952e9c5dca01bc2f4ccef2f974 -https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.0-h28cd5cc_2.conda#6af5d0cbd7310e1cd8a6a5c1c99649b2 -https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.19.2-hac12032_0.conda#62a43976b48799377103390c340a3824 -https://repo.anaconda.com/pkgs/main/linux-64/libclang-10.0.1-default_hb85057a_2.conda#9e39ee5217327ba25e341c629b642247 -https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.4.0-hecacb30_2.conda#debd52cb518dce3d4f48833cdc1032e4 -https://repo.anaconda.com/pkgs/main/linux-64/libxkbcommon-1.0.1-hfa300c1_0.conda#913e6c7c04026ff341960a9700889498 -https://repo.anaconda.com/pkgs/main/linux-64/libxslt-1.1.35-h4e12654_0.conda#328c111d87dccd5a3e471a691833f670 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.40.0-h5082296_0.conda#d1300b056e728ea61a0bf135b035e60d -https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.13.1-hef1e5e3_1.conda#104cd6f83a6edd3e1fd662887f4bc215 -https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.0-h8213a91_2.conda#838648422452405b86699e780e293c1d -https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff -https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.9-h16c4e8d_3.conda#0f127be216a734916faf456bb21404e9 -https://repo.anaconda.com/pkgs/main/linux-64/libwebp-1.2.4-h11a3e52_0.conda#971acc20767cc834a6baffdeaae6a100 -https://repo.anaconda.com/pkgs/main/linux-64/nss-3.74-h0370c37_0.conda#fb2426b2f3cb17c9015fcbdf917a2f7b -https://repo.anaconda.com/pkgs/main/linux-64/python-3.8.13-haa1d7c7_1.conda#43a2c043262c004b0ad1b77fca992639 -https://repo.anaconda.com/pkgs/main/linux-64/attrs-22.1.0-py38h06a4308_0.conda#51beb64c6f06b5a69529df7ecaccc3f9 -https://repo.anaconda.com/pkgs/main/linux-64/certifi-2022.9.24-py38h06a4308_0.conda#2c24987d7c70c1c4c3a8c0f0e744b853 -https://repo.anaconda.com/pkgs/main/noarch/charset-normalizer-2.0.4-pyhd3eb1b0_0.conda#e7a441d94234b2b5fafee06e25dbf076 -https://repo.anaconda.com/pkgs/main/linux-64/coverage-6.2-py38h7f8727e_0.conda#34a3006ca7d8d286b63593b31b845ace -https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab -https://repo.anaconda.com/pkgs/main/linux-64/cython-0.29.32-py38h6a678d5_0.conda#81e586e2923e84782265d5e34b2c7189 -https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 -https://repo.anaconda.com/pkgs/main/linux-64/idna-3.4-py38h06a4308_0.conda#e1c05a7fa231e08f357d92702689cbdd -https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 -https://repo.anaconda.com/pkgs/main/linux-64/joblib-1.1.1-py38h06a4308_0.conda#e655dfc29e36336810c9f69dea37b2de -https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.2-py38h295c915_0.conda#00e5f5a50b547c8c31d1a559828f3251 -https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.17.3-py38h2f8d375_0.conda#40edbb76ecacefb1e6ab639b514822b1 -https://repo.anaconda.com/pkgs/main/linux-64/pillow-9.2.0-py38hace64e9_1.conda#a6b7baf62d6399704dfdeab8c0ec55f6 -https://repo.anaconda.com/pkgs/main/linux-64/pluggy-1.0.0-py38h06a4308_1.conda#87bb1d3f6cf3e409a1dac38cee99918e -https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py38_0.conda#d6a69c576c6e4d19e3074eaae3d149f2 -https://repo.anaconda.com/pkgs/main/noarch/py-1.11.0-pyhd3eb1b0_0.conda#7205a898ed2abbf6e9b903dff6abe08e -https://repo.anaconda.com/pkgs/main/noarch/pycparser-2.21-pyhd3eb1b0_0.conda#135a72ff2a31150a3a3ff0b1edd41ca9 -https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-3.0.9-py38h06a4308_0.conda#becbbf51d2b05de228eed968e20f963d -https://repo.anaconda.com/pkgs/main/linux-64/pysocks-1.7.1-py38h06a4308_0.conda#21c67581f3a81ffbb02728eb2178d693 -https://repo.anaconda.com/pkgs/main/linux-64/pytz-2022.1-py38h06a4308_0.conda#d9e022584b586338e235e41a76ccc657 -https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h327a75a_7.conda#1868b206ade356f1812a723804e1cc31 -https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 -https://repo.anaconda.com/pkgs/main/noarch/threadpoolctl-2.2.0-pyh0d69192_0.conda#bbfdbae4934150b902f97daaf287efe2 -https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a -https://repo.anaconda.com/pkgs/main/linux-64/tomli-2.0.1-py38h06a4308_0.conda#791cce9de9913e9587b0a85cd8419123 -https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.2-py38h5eee18b_0.conda#db2f7ebc500d97a4af6889dfd0d03dbc -https://repo.anaconda.com/pkgs/main/linux-64/cffi-1.15.1-py38h74dc2b5_0.conda#ca2d78b41be0525b8d328c078dfadfb9 -https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.17.3-py38h7e8d029_0.conda#5f2b196b515f8fe6b37e3d224650577d -https://repo.anaconda.com/pkgs/main/noarch/packaging-21.3-pyhd3eb1b0_0.conda#07bbfbb961db7fa329cc42716943ea62 -https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 -https://repo.anaconda.com/pkgs/main/linux-64/qt-webengine-5.15.9-hd2b0992_4.conda#ed674e212597b93fffa1afc90a3e100c -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-65.5.0-py38h06a4308_0.conda#39a83921f08b25897e9e4d07f4d41179 -https://repo.anaconda.com/pkgs/main/linux-64/brotlipy-0.7.0-py38h27cfd23_1003.conda#e881c8ee8a4048f29da5d20f0330fe37 -https://repo.anaconda.com/pkgs/main/linux-64/cryptography-38.0.1-py38h9ce1e76_0.conda#1f179fad71e46b148b6f471770fa64f3 -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.1.3-py38hef1b27d_0.conda#a7ad7d097c25b7beeb76f370d51687a1 -https://repo.anaconda.com/pkgs/main/linux-64/pandas-1.2.4-py38ha9443f7_0.conda#5bd3fd807a294f387feabc65821b75d0 -https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.1.2-py38h06a4308_0.conda#8d7f526a3d29273e06957d302f515755 -https://repo.anaconda.com/pkgs/main/linux-64/qtwebkit-5.212-h4eab89a_4.conda#7317bbf3f3e66a0a02b07b860783ecff -https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.3.2-py38he2b7bc3_0.conda#a9df91d5a41c1f39524fc8a53c56bc29 -https://repo.anaconda.com/pkgs/main/linux-64/sip-6.6.2-py38h6a678d5_0.conda#cb3f0d10f7f79870945f4dbbe0000f92 -https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py38h79cecc1_0.conda#6e7f4f94000b244396de8bf4e6ae8dc4 -https://repo.anaconda.com/pkgs/main/noarch/pyopenssl-22.0.0-pyhd3eb1b0_0.conda#1dbbf9422269cd62c7094960d9b43f36 -https://repo.anaconda.com/pkgs/main/linux-64/pyqt5-sip-12.11.0-py38h6a678d5_1.conda#7bc403c7d55f1465e922964d293d2186 -https://repo.anaconda.com/pkgs/main/noarch/pytest-cov-3.0.0-pyhd3eb1b0_0.conda#bbdaac2947f507399816d509107945c2 -https://repo.anaconda.com/pkgs/main/noarch/pytest-forked-1.3.0-pyhd3eb1b0_0.tar.bz2#07970bffdc78f417d7f8f1c7e620f5c4 -https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.7-py38h6a678d5_1.conda#62232dc285be8e7e85ae9596d89b3b95 -https://repo.anaconda.com/pkgs/main/noarch/pytest-xdist-2.5.0-pyhd3eb1b0_0.conda#d15cdc4207bcf8ca920822597f1d138d -https://repo.anaconda.com/pkgs/main/linux-64/urllib3-1.26.12-py38h06a4308_0.conda#aa9ea62db989b3ba169a82c695eea20c -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.1.3-py38_0.conda#70d5f6df438d469dc78f082389ada23d -https://repo.anaconda.com/pkgs/main/linux-64/requests-2.28.1-py38h06a4308_0.conda#04d482ea4a1e190d688dee2e4048e49f -https://repo.anaconda.com/pkgs/main/noarch/codecov-2.1.11-pyhd3eb1b0_0.conda#83a743cc928162d53d4066c43468b2c7 diff --git a/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock b/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock deleted file mode 100644 index 821e5f92ab51c..0000000000000 --- a/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock +++ /dev/null @@ -1,121 +0,0 @@ -# Generated by conda-lock. -# platform: win-64 -# input_hash: e176819d6d3155f9b8afd9e262f268db47cb5d6dc157a00168d3bd0c0f55766c -@EXPLICIT -https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2022.9.24-h5b45459_0.tar.bz2#5fba0abc60bf327a4bc4188cd64678be -https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2022.1.0-h57928b3_3787.tar.bz2#35dff2b6e944ce136a574c4c006cec28 -https://conda.anaconda.org/conda-forge/win-64/mkl-include-2022.1.0-h6a75c08_874.tar.bz2#414f6ab96ad71e7a95bd00d990fa3473 -https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2#b0309b72560df66f71a9d5e34a5efdfa -https://conda.anaconda.org/conda-forge/win-64/python_abi-3.8-3_cp38.conda#c6df946723dadd4a5830a8ff8c6b9a20 -https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_0.tar.bz2#72608f6cd3e5898229c3ea16deb1ac43 -https://conda.anaconda.org/conda-forge/win-64/m2w64-gmp-6.1.0-2.tar.bz2#53a1c73e1e3d185516d7e3af177596d9 -https://conda.anaconda.org/conda-forge/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2#774130a326dee16f1ceb05cc687ee4f0 -https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.32.31332-h1d6e394_9.tar.bz2#c98b6e39006315599b793592bcc3c978 -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2#4289d80fb4d272f1f3b56cfe87ac90bd -https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h3d8a991_9.tar.bz2#ba28983ef4f6d430827d0e7c5cdd7b48 -https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h8ffe710_4.tar.bz2#7c03c66026944073040cb19a4f3ec3c9 -https://conda.anaconda.org/conda-forge/win-64/icu-70.1-h0e60522_0.tar.bz2#64073396a905b6df895ab2489fae3847 -https://conda.anaconda.org/conda-forge/win-64/jpeg-9e-h8ffe710_2.tar.bz2#733066523147548ce368a9bd0c8395af -https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2#1900cb3cab5055833cfddb0ba233b074 -https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.0.9-hcfcfb64_8.tar.bz2#e8078e37208cd7d3e1eb5053f370ded8 -https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.14-hcfcfb64_0.tar.bz2#4366e00d3270eb229c026920474a6dda -https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2#2c96d1b6915b408893f9472569dee135 -https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-h8ffe710_0.tar.bz2#050119977a86e4856f0416e2edcf81bb -https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.4-h8ffe710_1.tar.bz2#04286d905a0dcb7f7d4a12bdfe02516d -https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.40.0-hcfcfb64_0.tar.bz2#5e5a97795de72f8cc3baf3d9ea6327a2 -https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.2.4-h8ffe710_0.tar.bz2#0a09bd195ebeaff5711ccae93ac132ad -https://conda.anaconda.org/conda-forge/win-64/libzlib-1.2.13-hcfcfb64_4.tar.bz2#0cc5c5cc64ee1637f37f8540a175854c -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2#066552ac6b907ec6d72c0ddab29050dc -https://conda.anaconda.org/conda-forge/win-64/openssl-1.1.1s-hcfcfb64_0.tar.bz2#d5bc4691e3b8f238964208ed8b894a00 -https://conda.anaconda.org/conda-forge/win-64/tbb-2021.7.0-h91493d7_0.tar.bz2#f57be598137919e4f7e7d159960d66a1 -https://conda.anaconda.org/conda-forge/win-64/tk-8.6.12-h8ffe710_0.tar.bz2#c69a5047cc9291ae40afd4a1ad6f0c0f -https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2#515d77642eaa3639413c6b1bc3f94219 -https://conda.anaconda.org/conda-forge/win-64/gettext-0.21.1-h5728263_0.tar.bz2#299d4fd6798a45337042ff5a48219e5f -https://conda.anaconda.org/conda-forge/win-64/krb5-1.19.3-h1176d77_0.tar.bz2#2e0d447ab95d58d3ea1222121ec57f9f -https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.0.9-hcfcfb64_8.tar.bz2#99839d9d81f33afa173c0fa82a702038 -https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.0.9-hcfcfb64_8.tar.bz2#88e62627120c20289bf8982b15e0a6a1 -https://conda.anaconda.org/conda-forge/win-64/libclang13-15.0.5-default_h77d9078_0.tar.bz2#200796292aff4e7547eaf373872baa39 -https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.39-h19919ed_0.conda#ab6febdb2dbd9c00803609079db4de71 -https://conda.anaconda.org/conda-forge/win-64/libvorbis-1.3.7-h0e60522_0.tar.bz2#e1a22282de0169c93e4ffe6ce6acc212 -https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de -https://conda.anaconda.org/conda-forge/win-64/mkl-2022.1.0-h6a75c08_874.tar.bz2#2ff89a7337a9636029b4db9466e9f8e3 -https://conda.anaconda.org/conda-forge/win-64/pcre2-10.40-h17e33f8_0.tar.bz2#2519de0d9620dc2bc7e19caf6867136d -https://conda.anaconda.org/conda-forge/win-64/python-3.8.15-h0269646_0_cpython.conda#c357e563492a7239723e3bf192151780 -https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.2-h7755175_4.tar.bz2#13acb3626fcc8c0577249f3a7b6129f4 -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.0.9-hcfcfb64_8.tar.bz2#e18b70ed349d96086fd60a9c642b1b58 -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-2.1.1-pyhd8ed1ab_0.tar.bz2#c1d5b294fbf9a795dec349a6f4d8be8e -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/win-64/cython-0.29.32-py38hd3f51b4_1.tar.bz2#cae84cafa303ba6c676bdcc3047bfa08 -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-h546665d_0.tar.bz2#8bfa20ad87170f94e856133bafa5f5cf -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.4-py38hb1fd069_1.tar.bz2#1dcc50e3241f9e4e59713eec2653abd5 -https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-16_win64_mkl.tar.bz2#d2e6f4e86cee2b4e8c27ff6884ccdc61 -https://conda.anaconda.org/conda-forge/win-64/libclang-15.0.5-default_h77d9078_0.tar.bz2#1f36af7abc82c6b89f13b574450ac3b2 -https://conda.anaconda.org/conda-forge/win-64/libglib-2.74.1-he8f3873_1.tar.bz2#09e1cbabfd9d733729843c3b35cb0b6d -https://conda.anaconda.org/conda-forge/win-64/libtiff-4.4.0-h8e97e67_4.tar.bz2#3ef0d0259b2d742e8c6a07598614a5d6 -https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2022.1.0-h57928b3_875.tar.bz2#6319a06307af296c1dfae93687c283b2 -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pycparser-2.21-pyhd8ed1ab_0.tar.bz2#076becd9e05608f8dc72757d5f3a91ff -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/win-64/tornado-6.2-py38h91455d4_1.tar.bz2#ed09a022d62a1550692f856c104d929e -https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.0.0-py38h91455d4_0.tar.bz2#7a135e40d9f26c15419e5e82e1c436c0 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.38.4-pyhd8ed1ab_0.tar.bz2#c829cfb8cb826acb9de0ac1a2df0a940 -https://conda.anaconda.org/conda-forge/noarch/win_inet_pton-1.1.0-pyhd8ed1ab_6.tar.bz2#30878ecc4bd36e8deeea1e3c151b2e0b -https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.9-hcd874cb_0.tar.bz2#9cef622e75683c17d05ae62d66e69e6c -https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece -https://conda.anaconda.org/conda-forge/win-64/brotli-1.0.9-hcfcfb64_8.tar.bz2#2e661f21e1741c11506bdc7226e6b0bc -https://conda.anaconda.org/conda-forge/win-64/cffi-1.15.1-py38h57701bc_2.tar.bz2#4e290e24ff3aa60183f928d4e144c4fb -https://conda.anaconda.org/conda-forge/win-64/coverage-6.5.0-py38h91455d4_1.tar.bz2#7ba1bb13999b89fdce5f3385d5e28c2b -https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.74.1-h12be248_1.tar.bz2#cd93cc622f2fa0f68ddc978cb67a5061 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/win-64/lcms2-2.14-h90d422f_0.tar.bz2#a0deec92aa16fca7bf5a6717d05f88ee -https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-16_win64_mkl.tar.bz2#14c2fb03b2bb14dfa3806186ca91d557 -https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-16_win64_mkl.tar.bz2#be2f9d5712a5bb05cd900005ee752a05 -https://conda.anaconda.org/conda-forge/win-64/libxcb-1.13-hcd874cb_1004.tar.bz2#a6d7fd030532378ecb6ba435cd9f8234 -https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.0-hc9384bd_1.tar.bz2#a6834096f8d834339eca7ef4d23bcc44 -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/noarch/pip-22.3.1-pyhd8ed1ab_0.tar.bz2#da66f2851b9836d3a7c5190082a45f7d -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyh0701188_6.tar.bz2#56cd9fe388baac0e90c7149cfac95b60 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/win-64/brotlipy-0.7.0-py38h91455d4_1005.tar.bz2#9fabc7fadfb37addbe91cc67c09cda69 -https://conda.anaconda.org/conda-forge/win-64/cryptography-38.0.3-py38h086c683_0.tar.bz2#0831ec95eedb26f5ab4066171f267920 -https://conda.anaconda.org/conda-forge/win-64/fonttools-4.38.0-py38h91455d4_1.tar.bz2#45aa8e4d44d4b82db1ba373b6b7fbd61 -https://conda.anaconda.org/conda-forge/win-64/glib-2.74.1-h12be248_1.tar.bz2#7564888ab882b9d3aea46355ab7adaca -https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-16_win64_mkl.tar.bz2#983e827b7c9562075c2e74d596d056c1 -https://conda.anaconda.org/conda-forge/win-64/numpy-1.23.5-py38h90ce339_0.conda#e393f5a46fb6402723f63b7039a4e40f -https://conda.anaconda.org/conda-forge/win-64/pillow-9.2.0-py38h3cd753b_3.tar.bz2#484d635897a9e98e99d161289c4dbaf5 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/win-64/sip-6.7.5-py38hd3f51b4_0.conda#99a5d7532da18344a6648dd8e0f0e270 -https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-16_win64_mkl.tar.bz2#dc89c75a7dd26c88ac77d64bf313973e -https://conda.anaconda.org/conda-forge/win-64/contourpy-1.0.6-py38hb1fd069_0.tar.bz2#caaff6619b92a1fa2f7fa07292010550 -https://conda.anaconda.org/conda-forge/win-64/gstreamer-1.21.2-h6b5321d_0.conda#19a9f9ee43fcfedbf72ed09656601bc9 -https://conda.anaconda.org/conda-forge/noarch/pyopenssl-22.1.0-pyhd8ed1ab_0.tar.bz2#fbfa0a180d48c800f922a10a114a8632 -https://conda.anaconda.org/conda-forge/win-64/pyqt5-sip-12.11.0-py38hd3f51b4_2.tar.bz2#cbc432ec0d62367c7d9d7f486207712a -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.0.0-pyhd8ed1ab_0.tar.bz2#c9e3f8bfdb9bfc34aa1836a6ed4b25d7 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/win-64/scipy-1.9.3-py38h0f6ee2a_2.tar.bz2#92cb8018ca3747eb8502e22d78eed95f -https://conda.anaconda.org/conda-forge/win-64/blas-2.116-mkl.tar.bz2#7529860b43278247a278c6f56a191d2e -https://conda.anaconda.org/conda-forge/win-64/gst-plugins-base-1.21.2-h001b923_0.conda#e46a55a23deb80b07ad1005fc787a16d -https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.6.2-py38h528a6c7_0.tar.bz2#c72de8aadeb6468b23ccfd5be1107c3b -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 -https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.6-h9c3277a_2.conda#cd3a8cc5c3740613a34c2f8553150f2d -https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_1.tar.bz2#089382ee0e2dc2eae33a04cc3c2bddb0 -https://conda.anaconda.org/conda-forge/noarch/codecov-2.1.12-pyhd8ed1ab_0.conda#0317ed52e504b93da000e8a027628775 -https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.7-py38hd6c051e_2.tar.bz2#b33fbea51980ecf275cef2262711f1ad -https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.6.2-py38haa244fe_0.tar.bz2#8e5672391509eae8501a952f4147fd2b diff --git a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock deleted file mode 100644 index 2922898a5e6ed..0000000000000 --- a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock +++ /dev/null @@ -1,133 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: 75dcb70ec40f9bd38136e66f4911ac8da8c539671a03f9d9b8b802ba1b6fafd8 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.9.24-ha878542_0.tar.bz2#41e4e87062433e283696cf384f952ef6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.39-hcc3a1bd_1.conda#737be0d34c22d24432049ab7a3214de4 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-12.2.0-h337968e_19.tar.bz2#164b4b1acaedc47ee7e658ae6b308ca3 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-12.2.0-h46fd767_19.tar.bz2#1030b1f38c129f2634eae026f704fe60 -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.8-3_cp38.conda#2f3f7af062b42d664117662612022204 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-12.2.0-h69a702a_19.tar.bz2#cd7a806282c16e1f2d39a7e80d3a3e0d -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-12.2.0-h65d4601_19.tar.bz2#e4c94f80aef025c17ab0828cd85ef535 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.3.2-h166bdaf_0.tar.bz2#b7607b7b62dce55c194ad84f99464e5f -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-h27087fc_0.tar.bz2#c4fbad8d4bddeb3c085f18cbf97fbfad -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/icu-69.1-h9c3ff4c_0.tar.bz2#e0773c9556d588b062a4e1424a6a02fa -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 -https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.14-h166bdaf_0.tar.bz2#fc84a0446e4e4fb882e78d786cfb9734 -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.21-pthreads_h78a6416_3.tar.bz2#8c5963a49b6035c40646a763293fbb35 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.32.1-h7f98852_1000.tar.bz2#772d69f030955d9646d3d0eaf21d859d -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.4-h166bdaf_0.tar.bz2#ac2ccf7323d21f2994e4d1f5da664f37 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.3-h27087fc_1.tar.bz2#4acfc691e64342b9dae57cf2adc63238 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.32-h9c3ff4c_1.tar.bz2#29ded371806431b0499aaee146abfc3e -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.7-h166bdaf_0.tar.bz2#d1ad1824c71e67dea42f07e06cd177dc -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.9-h7f98852_0.tar.bz2#bf6f803a544f26ebbdc3bfff272eb179 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_openblas.tar.bz2#d9b7a8639171f6c6fa0a983edabcfe2b -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.10-h28343ad_4.tar.bz2#4a049fc560e00e43151dc51368915fdd -https://conda.anaconda.org/conda-forge/linux-64/libllvm13-13.0.1-hf817b99_2.tar.bz2#47da3ce0d8b2e65ccb226c186dd91eba -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.40.0-h753d276_0.tar.bz2#2e5f9a37d487e1019fd4d8113adb2f9f -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-15.0.5-he0ac6c6_0.tar.bz2#5c4783b468153a1d8f33874c5bb55864 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.31-h26416b9_0.tar.bz2#6c531bc30d49ae75b9c7c7f65bd62e3c -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.21-pthreads_h320a7e8_3.tar.bz2#29155b9196b9d78022f11d86733e25a7 -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b -https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h6239696_4.tar.bz2#adcf0be7897e73e312bd24353b613f74 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.7.3-h2599c5e_0.tar.bz2#4feea9466084c6948bd59539f1c0bb72 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_0.tar.bz2#4e54cbfc47b8c74c2ecc1e7730d8edce -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.19.3-h08a2579_0.tar.bz2#d25e05e7ee0e302b52d24491db4891eb -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_openblas.tar.bz2#20bae26d0a1db73f758fc3754cab4719 -https://conda.anaconda.org/conda-forge/linux-64/libclang-13.0.1-default_hc23dcda_0.tar.bz2#8cebb0736cba83485b13dc10d242d96d -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.74.1-h606061b_1.tar.bz2#ed5349aa96776e00b34eccecf4a948fe -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_openblas.tar.bz2#955d993f41f9354bf753d29864ea20ad -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h55922b4_4.tar.bz2#901791f0ec7cddc8714e76e273013a91 -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.12-h885dcf4_1.tar.bz2#d1355eaa48f465782f228275a0a69771 -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.31-hbc51c84_0.tar.bz2#da9633eee814d4e910fe42643a356315 -https://conda.anaconda.org/conda-forge/linux-64/python-3.8.15-h4a9ceb5_0_cpython.conda#dc29a8a79d0f2c80004cc06d3190104f -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.40.0-h4ff8645_0.tar.bz2#bb11803129cbbb53ed56f9506ff74145 -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.32-py38hfa26641_1.tar.bz2#eef241f25124f2f486f9994bcbf19751 -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.1-hc2a2eb6_0.tar.bz2#78415f0180a8d9c5bcc47889e00d5fb1 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.74.1-h6239696_1.tar.bz2#5f442e6bc9d89ba236eb25a25c5c2815 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py38h43d8883_1.tar.bz2#41ca56d5cac7bfc7eb4fcdbee878eb84 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.14-h6ed2654_0.tar.bz2#dcc588839de1445d90995a0a2c4f3a39 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_openblas.tar.bz2#823ceb5567e1a595deb643fcd17aed5a -https://conda.anaconda.org/conda-forge/linux-64/libpq-14.5-he2d8382_1.tar.bz2#c194811a2d160ef3210218ee508b6075 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.0.3-he3ba5ed_0.tar.bz2#f9dbabc7e01c459ed7a1d1d64b206e9b -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.78-h2350873_0.tar.bz2#ab3df39f96742e6f1a9878b09274c1dc -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.23.5-py38h7042d01_0.conda#d5a3620cd8c1af4115120f21d678507a -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h7d73246_1.tar.bz2#a11b4df9271a8d7917686725aa04c8f2 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-4.19.18-py38h709712a_8.tar.bz2#11b72f5b1cc15427c89232321172a0bc -https://conda.anaconda.org/conda-forge/noarch/pytz-2022.6-pyhd8ed1ab_0.tar.bz2#b1f26ad83328e486910ef7f6e81dc061 -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py38h0a891b7_1.tar.bz2#358beb228a53b5e1031862de3525d1d3 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py38h0a891b7_0.tar.bz2#44421904760e9f5ae2035193e04360f0 -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-16_linux64_openblas.tar.bz2#519562d6176dab9c2ab9a8336a14c8e7 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.6-py38h43d8883_0.tar.bz2#1107ee053d55172b26c4fc905dd0238e -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.38.0-py38h0a891b7_1.tar.bz2#62c89ddefed9c5835e228a32b357a28d -https://conda.anaconda.org/conda-forge/linux-64/glib-2.74.1-h6239696_1.tar.bz2#f3220a9e9d3abcbfca43419a219df7e4 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py38h9eb91d8_3.tar.bz2#61dc7b3140b7b79b1985b53d52726d74 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.9.3-py38h8ce737c_2.tar.bz2#dfd81898f0c6e9ee0c22305da6aa443e -https://conda.anaconda.org/conda-forge/linux-64/blas-2.116-openblas.tar.bz2#02f34bcf0aceb6fae4c4d1ecb71c852a -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.20.3-hd4edc92_2.tar.bz2#153cfb02fb8be7dd7cabcbcb58a63053 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.6.2-py38hb021067_0.tar.bz2#72422499195d8aded0dfd461c6e3e86f -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.5.2-py38h8f669ce_0.conda#dbc17622f9d159be987bd21959d5494e -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py38h4e30db6_2.tar.bz2#71e8ccc750d0e6e9a55c63bc39a4e5b8 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.20.2-hcf0ee16_0.tar.bz2#79d7fca692d224dc29a72bda90f78a7b -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-5.12.9-h1304e3e_6.tar.bz2#f2985d160b8c43dd427923c04cd732fe -https://conda.anaconda.org/conda-forge/linux-64/pyqt-impl-5.12.3-py38h0ffb2e6_8.tar.bz2#acfc7625a212c27f7decdca86fdb2aba -https://conda.anaconda.org/conda-forge/linux-64/pyqtchart-5.12-py38h7400c14_8.tar.bz2#78a2a6cb4ef31f997c1bee8223a9e579 -https://conda.anaconda.org/conda-forge/linux-64/pyqtwebengine-5.12.1-py38h7400c14_8.tar.bz2#857894ea9c5e53c962c3a0932efa71ea -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38h578d9bd_8.tar.bz2#88368a5889f31dff922a2d57bbfc3f5b -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.6.2-py38h578d9bd_0.tar.bz2#e1a19f0d4686a701d4a4acce2b625acb diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock index 569ad944f7037..bf5bcd3daff08 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock @@ -1,171 +1,221 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: e59a40b88334d702327a777b695d15c65c6ff904d742abc604e894d78faca06e +# input_hash: 2622dc7361d0af53cfb31534b939a13e48192a3260137ba4ec20083659c2e5fa @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.9.24-ha878542_0.tar.bz2#41e4e87062433e283696cf384f952ef6 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.39-hcc3a1bd_1.conda#737be0d34c22d24432049ab7a3214de4 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-12.2.0-h337968e_19.tar.bz2#164b4b1acaedc47ee7e658ae6b308ca3 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-12.2.0-h46fd767_19.tar.bz2#1030b1f38c129f2634eae026f704fe60 -https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2022.1.0-h84fe81f_915.tar.bz2#2dcd1acca05c11410d4494d7fc7dfa2a -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.11-3_cp311.conda#c2e2630ddb68cf52eec74dc7dfab20b5 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2022f-h191b570_0.tar.bz2#e366350e2343a798e29833286abe2560 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.11-4_cp311.conda#d786502c97404c94d7d58d258a445a65 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-12.2.0-h69a702a_19.tar.bz2#cd7a806282c16e1f2d39a7e80d3a3e0d https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-12.2.0-h65d4601_19.tar.bz2#e4c94f80aef025c17ab0828cd85ef535 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-h27087fc_0.tar.bz2#c4fbad8d4bddeb3c085f18cbf97fbfad -https://conda.anaconda.org/conda-forge/linux-64/fftw-3.3.10-nompi_hf0379b8_105.tar.bz2#9d3e01547ba04a57372beee01158096f -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-orc-0.4.33-h166bdaf_0.tar.bz2#879c93426c9d0b84a9de4513fbce5f4f -https://conda.anaconda.org/conda-forge/linux-64/icu-70.1-h27087fc_0.tar.bz2#87473a15119779e021c314249d4b4aed -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.9.0-hd590300_0.conda#71b89db63b5b504e7afc8ad901172e1e +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.28.1-hd590300_0.conda#dcde58ff9a1f30b0037a2315d1846d1f +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1004.tar.bz2#cddaf2c63ea4a5901cf09524c490ecdc +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdb-6.2.32-h9c3ff4c_0.tar.bz2#3f3258d8f841fbac63b36b75bdac1afd -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.14-h166bdaf_0.tar.bz2#fc84a0446e4e4fb882e78d786cfb9734 +https://conda.anaconda.org/conda-forge/linux-64/libabseil-20230125.3-cxx17_h59595ed_0.conda#d1db1b8be7c3a8983dcbbbfe4f0765de +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_9.conda#61641e239f96eae2b8492dc7e755828c +https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libnuma-2.0.18-h4ab18f5_2.conda#a263760479dbc7bc1f3df12707bd90dc https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libtool-2.4.6-h9c3ff4c_1008.tar.bz2#16e143a1ed4b4fd169536373957f6fee -https://conda.anaconda.org/conda-forge/linux-64/libudev1-252-h166bdaf_0.tar.bz2#174243089ec111479298a5b7099b64b5 -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.32.1-h7f98852_1000.tar.bz2#772d69f030955d9646d3d0eaf21d859d -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.4-h166bdaf_0.tar.bz2#ac2ccf7323d21f2994e4d1f5da664f37 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.3-h9c3ff4c_1.tar.bz2#fbe97e8fa6f275d7c76a09e795adc3e6 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.30.2-h27087fc_1.tar.bz2#2fe2a839394ef3a1825a5e5e296060bc -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.3-h27087fc_1.tar.bz2#4acfc691e64342b9dae57cf2adc63238 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.32-h9c3ff4c_1.tar.bz2#29ded371806431b0499aaee146abfc3e -https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1s-h166bdaf_0.tar.bz2#e17553617ce05787d97715177be014d1 +https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.8.0-h166bdaf_0.tar.bz2#ede4266dc02e875fe1ea77b25dd43747 +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 +https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-hd590300_0.conda#c0f3abb4a16477208bbd43a39bd56f18 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.7.0-h924138e_0.tar.bz2#819421f81b127a5547bf96ad57eccdd9 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.9-h7f98852_0.tar.bz2#bf6f803a544f26ebbdc3bfff272eb179 +https://conda.anaconda.org/conda-forge/linux-64/rdma-core-28.9-h59595ed_1.conda#aeffb7c06b5f65e55e6c637408dc4100 +https://conda.anaconda.org/conda-forge/linux-64/re2-2023.03.02-h8c504da_0.conda#206f8fa808748f6e90599c3368a1114e +https://conda.anaconda.org/conda-forge/linux-64/sleef-3.5.1-h9b69904_2.tar.bz2#6e016cf4c525d04a7bd038cee53ad3fd +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.10-hdb0a2a9_1.conda#78b8b85bdf1f42b8a2b3cb577d8742d1 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.66-ha37c62d_0.tar.bz2#2d7665abd0997f1a6d4b7596bc27b657 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.6.1-hc309b26_1.conda#cc09293a2c2b7fd77aff284f370c12c0 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.2.17-h4d4d85c_2.conda#9ca99452635fe03eb5fa937f5ae604b0 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.1.12-h4d4d85c_1.conda#eba092fc6de212a01de0065f38fe8bbb +https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.1.17-h4d4d85c_1.conda#30f9df85ce23cd14faa9a4dfa50cca2b +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/glog-0.6.0-h6f12383_0.tar.bz2#b31f3565cb84435407594e548a2fb7b2 +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_9.conda#081aa22f4581c08e4372b0b6c2f8478e +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_9.conda#1f0a03af852a9659ed2bf08f2f1704fd +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.10-h9b69904_4.tar.bz2#390026683aef81db27ff1b8570ca1336 -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.45-hc0c96e0_0.tar.bz2#839aeb24ab885a7b902247a6d943d02f -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.40.0-h753d276_0.tar.bz2#2e5f9a37d487e1019fd4d8113adb2f9f +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.21.12-hfc55251_2.conda#e3a7d4ba09b8dc939b98fef55f539220 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.10.3-h7463322_0.tar.bz2#3b933ea47ef8f330c4c068af25fcd6a8 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-15.0.5-he0ac6c6_0.tar.bz2#5c4783b468153a1d8f33874c5bb55864 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.31-haf5c9bc_0.tar.bz2#0249d755f8d26cb2ac796f9f01cfb823 -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b -https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h6239696_4.tar.bz2#adcf0be7897e73e312bd24353b613f74 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.7.3-h2599c5e_0.tar.bz2#4feea9466084c6948bd59539f1c0bb72 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_0.tar.bz2#4e54cbfc47b8c74c2ecc1e7730d8edce -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.19.3-h3790be6_0.tar.bz2#7d862b05445123144bec92cb1acc8ef8 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.74.1-h606061b_1.tar.bz2#ed5349aa96776e00b34eccecf4a948fe -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.5-h63197d8_0.tar.bz2#339faf1a5e13c0d4abab84405847ad13 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.1.0-h27087fc_0.tar.bz2#02fa0b56a57c8421d1195bf0c021e682 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h55922b4_4.tar.bz2#901791f0ec7cddc8714e76e273013a91 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.0.3-he3ba5ed_0.tar.bz2#f9dbabc7e01c459ed7a1d1d64b206e9b -https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.1.0-h84fe81f_915.tar.bz2#b9c8f925797a93dbff45e1626b025a6b -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.31-h28c427c_0.tar.bz2#455d44a05123f30f66af2ca2a9652b5f -https://conda.anaconda.org/conda-forge/linux-64/python-3.11.0-h582c2e5_0_cpython.tar.bz2#ac6e08a5519c81473b4f962660d36608 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.40.0-h4ff8645_0.tar.bz2#bb11803129cbbb53ed56f9506ff74145 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-h166bdaf_0.tar.bz2#384e7fcb3cd162ba3e4aed4b687df566 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h166bdaf_0.tar.bz2#637054603bb7594302e3bf83f0a99879 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-h166bdaf_0.tar.bz2#732e22f1741bccea861f5668cf7342a7 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h166bdaf_0.tar.bz2#0a8e20a8aef954390b9481a527421a8c -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-2.1.1-pyhd8ed1ab_0.tar.bz2#c1d5b294fbf9a795dec349a6f4d8be8e +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_0.conda#5d801a4906adc712d480afc362623b59 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/s2n-1.3.49-h06160fa_0.conda#1d78349eb26366ecc034a4afe70a8534 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/ucx-1.14.1-h64cca9d_5.conda#39aa3b356d10d7e5add0c540945a0944 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.13.32-he9a53bd_1.conda#8a24e5820f4a0ffd2ed9c4722cd5d7ca +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_9.conda#d47dee1856d9cb955b8076eeff304a5b +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.54.3-hb20ce57_0.conda#7af7c59ab24db007dfd82e0a3a343f66 +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.10.0-default_h2fb2949_1000.conda#7e3726e647a619c6ce5939014dfde86d +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.5-hb77312f_0.conda#efd221d3668077ca067a206269418dec +https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.18.1-h8fd135c_2.conda#bbf65f7688512872f063810623b755dc +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.5-ha31de31_0.conda#b923cdb6e567ada84f991ffcc5848afb +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/orc-1.9.0-h2f23424_1.conda#9571eb3eb0f7fe8b59956a7786babbcd +https://conda.anaconda.org/conda-forge/linux-64/python-3.11.9-hb806964_0_cpython.conda#ac68acfa8b558ed406c75e98d3428d7b +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/array-api-compat-1.6-pyhd8ed1ab_0.conda#f04c36d7284243a7d982b4ef4982eb23 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.3.1-h2e3709c_4.conda#2cf21b1cbc1c096a28ffa2892257a2c1 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.7.11-h00aa349_4.conda#cb932dff7328ff620ce8059c9968b095 +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_9.conda#4601544b4982ba1861fa9b9c607b2c06 +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.32-py311ha362b79_1.tar.bz2#b24f3bc51bda5364df92f39b9256a2a6 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py311hb755f60_0.conda#f3a8a500a2e743ff92f418f0eaf9bf71 https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.1-hc2a2eb6_0.tar.bz2#78415f0180a8d9c5bcc47889e00d5fb1 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.74.1-h6239696_1.tar.bz2#5f442e6bc9d89ba236eb25a25c5c2815 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/linux-64/jack-1.9.21-he978b8e_1.tar.bz2#5cef21ebd70a90a0d28127543a8d3739 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py311h4dd048b_1.tar.bz2#46d451f575392c01dc193069bd89766d -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.14-h6ed2654_0.tar.bz2#dcc588839de1445d90995a0a2c4f3a39 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a -https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.5-default_h3a83d3e_0.tar.bz2#ae4ab2853ffd9165ac91e91f64e4539d -https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h3e49a29_2.tar.bz2#3b88f1d0fe2580594d58d7e44d664617 -https://conda.anaconda.org/conda-forge/linux-64/libpq-14.5-hd77ab85_1.tar.bz2#f5c8135a70758d928a8126998a6558d8 -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-252-h2a991cd_0.tar.bz2#3c5ae9f61f663b3d5e1bf7f7da0c85f5 -https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2022.1.0-ha770c72_916.tar.bz2#69ba49e445f87aea2cba343a71a35ca2 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py311h9547e67_1.conda#2c65bdf442b0d37aad080c8a4e0d452f +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.5-default_h5d6823c_0.conda#60c39a00b694c98da03f67a3ba1d7499 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.7.1-hca28451_0.conda#755c7f876815003337d2c61ff5d047e5 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.78-h2350873_0.tar.bz2#ab3df39f96742e6f1a9878b09274c1dc -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h7d73246_1.tar.bz2#a11b4df9271a8d7917686725aa04c8f2 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pycparser-2.21-pyhd8ed1ab_0.tar.bz2#076becd9e05608f8dc72757d5f3a91ff -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/pytz-2022.6-pyhd8ed1ab_0.tar.bz2#b1f26ad83328e486910ef7f6e81dc061 -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.12.0-h00ab1b0_0.conda#f1b776cff1b426e7e7461a8502a3b731 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py311hd4cff14_1.tar.bz2#4d86cd6dbdc1185f4e72d974f1f1f852 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h166bdaf_0.tar.bz2#c9b568bd804cb2903c6be6f5f68182e4 -https://conda.anaconda.org/conda-forge/linux-64/cffi-1.15.1-py311h409f033_2.tar.bz2#675a030b42ca1ee616e47ab208c39dff -https://conda.anaconda.org/conda-forge/linux-64/coverage-6.5.0-py311hd4cff14_1.tar.bz2#f59fc994658549d52497cb29f34b75a6 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.38.0-py311hd4cff14_1.tar.bz2#871b97970cf7420780f79a62fef8eb48 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.74.1-h6239696_1.tar.bz2#f3220a9e9d3abcbfca43419a219df7e4 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py311h459d7ec_0.conda#cc7727006191b8f3630936b339a76cd0 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.11.0-pyha770c72_0.conda#6ef2fc37559256cf682d8b3375e89b80 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.7.3-h28f7589_1.conda#97503d3e565004697f1651753aa95b9e +https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.9.3-hb447be9_1.conda#c520669eb0be9269a5f0d8ef62531882 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/coverage-7.5.1-py311h331c9d8_0.conda#9f35e13e3b9e05e153b78f42662061f6 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py311h459d7ec_0.conda#17e1997cc17c571d5ad27bd0159f616c +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.12.0-hac9eb74_1.conda#0dee716254497604762957076ac76540 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.2.1-h84fe81f_16997.conda#a7ce56d5757f5b57e7daabe703ade5bb +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py311h18e6fac_0.conda#6c520a9d36c9d7270988c7a6c360d6d4 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py311hb755f60_0.conda#02336abab4cb5dd794010ef53c54bd09 +https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.3.14-hf3aad02_1.conda#a968ffa7e9fe0c257628033d393e512f +https://conda.anaconda.org/conda-forge/linux-64/blas-1.0-mkl.tar.bz2#349aef876b1d8c9dccae01de20d5b385 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.3-haf2f30d_0.conda#f3df87cc9ef0b5113bff55aefcbcafd5 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py311hb755f60_5.conda#e4d262cc3600e70b505a6761d29f6207 +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.21.0-hb942446_5.conda#07d92ed5403ad7b5c66ffd7d5b8f7e57 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.3-h9ad1361_0.conda#8fb0e954c616bb0f9389efac4b4ed44b https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_mkl.tar.bz2#361bf757b95488de76c4f123805742d3 -https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.5-default_h2e3cab8_0.tar.bz2#bb1c595d445929e240a806bff0e67d9c https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.tar.bz2#a2f166748917d6d6e4707841ca1f519e -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py311h9461556_3.tar.bz2#03ff0e369f200145f55f94a7a5be1cc4 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-16.1-h4a94279_0.tar.bz2#7a499b94463000c83e349fffb6ce2631 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/brotlipy-0.7.0-py311hd4cff14_1005.tar.bz2#9bdac7084ecfc08338bae1b976535724 -https://conda.anaconda.org/conda-forge/linux-64/cryptography-38.0.3-py311hb3c386c_0.tar.bz2#7b17c8a122926b634b803567ac32872d -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.21.2-hd4edc92_0.conda#3ae425efddb9da5fb35edda331e4dff7 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_mkl.tar.bz2#44ccc4d4dca6a8d57fa17442bc64b5a1 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.23.5-py311h7d28db0_0.conda#de8cf17747d9efed488cafea2c39c9a1 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.5-py311ha362b79_0.conda#f6dd6ba47e2380b9c715fc45f0d45e62 -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-16_linux64_mkl.tar.bz2#3f92c1c9e1c0e183462c5071aa02cae1 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.6-py311h4dd048b_0.tar.bz2#d97ffb1b2692d8846d3fc1f20766eb08 -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.21.2-h3e40eee_0.conda#52cbed7e92713cf01b76445530396695 -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.5.2-py311h8b32b4d_0.conda#d203d6938a0c1a76cb540a2972644af7 -https://conda.anaconda.org/conda-forge/noarch/pyopenssl-22.1.0-pyhd8ed1ab_0.tar.bz2#fbfa0a180d48c800f922a10a114a8632 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py311ha362b79_2.tar.bz2#d250de3c3013c210865cc033164d6b60 -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.0.0-pyhd8ed1ab_0.tar.bz2#c9e3f8bfdb9bfc34aa1836a6ed4b25d7 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.9.3-py311h69910c8_2.tar.bz2#bb44baf80c9e22d4581dea2c030adb1c -https://conda.anaconda.org/conda-forge/linux-64/blas-2.116-mkl.tar.bz2#c196a26abf6b4f132c88828ab7c2231c -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.6.2-py311he728205_0.tar.bz2#96ec1bd38ecfc5ead0ac1eb8c4bf35ff -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py311h59ea3da_2.tar.bz2#4521a31493dbc02ffee57c524967b847 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.6-h7acdfc8_2.conda#7ec7d259b6d725ca952d40e2355e192c -https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py311h3408d8f_2.tar.bz2#5bf133633260e9d8d3f9a50ef78b49b2 -https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_1.tar.bz2#089382ee0e2dc2eae33a04cc3c2bddb0 -https://conda.anaconda.org/conda-forge/noarch/codecov-2.1.12-pyhd8ed1ab_0.conda#0317ed52e504b93da000e8a027628775 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.6.2-py311h38be061_0.tar.bz2#190a1bc60c0f7053daad403fa745fef3 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.10.57-h85b1a90_19.conda#0605d3d60857fc07bd6a11e878fe0f08 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py311h64a7726_0.conda#a502d7aad449a1206efb366d6a12c52d +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/noarch/array-api-strict-1.1.1-pyhd8ed1ab_0.conda#941bbcd64d1a7b44aeb497f468fc85b4 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py311h9547e67_0.conda#74ad0ae64f1ef565e27eda87fa749e84 +https://conda.anaconda.org/conda-forge/linux-64/libarrow-12.0.1-hb87d912_8_cpu.conda#3f3b11398fe79b578e3c44dd00a44e4a +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py311h320fe9a_0.conda#c79e96ece4110fdaf2657c9f8e16f749 +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.26-py311h00856b1_0.conda#d9002441c9b75b188f9cdc51bf4f22c7 +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py311hf0fb5b6_5.conda#ec7e45bc76d9d0b69a74a2075932b8e8 +https://conda.anaconda.org/conda-forge/linux-64/pytorch-1.13.1-cpu_py311h410fd25_1.conda#ddd2fadddf89e3dc3d541a2537fce010 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.0-py311h517d4fd_1.conda#a86b8bea39e292a23b2cf9a750f49ea1 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py311h54ef318_0.conda#150186110f111b458f86c04361351337 +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py311h92ebd52_0.conda#2d415a805458e93fcf5551760fd2d287 +https://conda.anaconda.org/conda-forge/linux-64/pyarrow-12.0.1-py311h39c9aba_8_cpu.conda#587370a25bb2c50cce90909ce20d38b8 +https://conda.anaconda.org/conda-forge/linux-64/pytorch-cpu-1.13.1-cpu_py311hdb170b5_1.conda#a805d5f103e493f207613283d8acbbe1 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py311h38be061_0.conda#fd6fc4385d0eb6b00c46c4c0d28f5c48 diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml index c6d6d70681063..30686a983ab35 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml +++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml @@ -14,10 +14,18 @@ dependencies: - matplotlib - pandas - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow - - codecov + - pip + - ninja + - meson-python - pytest-cov - coverage - ccache + - pytorch=1.13 + - pytorch-cpu + - polars + - pyarrow + - array-api-compat + - array-api-strict diff --git a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml deleted file mode 100644 index 24f8b92423f4b..0000000000000 --- a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_environment.yml +++ /dev/null @@ -1,20 +0,0 @@ -# DO NOT EDIT: this file is generated from the specification found in the -# following script to centralize the configuration for CI builds: -# build_tools/update_environments_and_lock_files.py -channels: - - conda-forge -dependencies: - - python - - numpy - - blas[build=mkl] - - scipy - - cython - - joblib - - threadpoolctl - - matplotlib - - pandas - - pyamg - - pytest - - pytest-xdist=2.5.0 - - pillow - - ccache diff --git a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock deleted file mode 100644 index 86625a3a2f4ce..0000000000000 --- a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock +++ /dev/null @@ -1,158 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: 23f21da087e988398169e2695d60ff854f13d5f56de5b588162ff77b8eb7a4bb -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.9.24-ha878542_0.tar.bz2#41e4e87062433e283696cf384f952ef6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.39-hcc3a1bd_1.conda#737be0d34c22d24432049ab7a3214de4 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-12.2.0-h337968e_19.tar.bz2#164b4b1acaedc47ee7e658ae6b308ca3 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-12.2.0-h46fd767_19.tar.bz2#1030b1f38c129f2634eae026f704fe60 -https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2022.1.0-h84fe81f_915.tar.bz2#2dcd1acca05c11410d4494d7fc7dfa2a -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.11-3_cp311.conda#c2e2630ddb68cf52eec74dc7dfab20b5 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2022f-h191b570_0.tar.bz2#e366350e2343a798e29833286abe2560 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-12.2.0-h69a702a_19.tar.bz2#cd7a806282c16e1f2d39a7e80d3a3e0d -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-12.2.0-h65d4601_19.tar.bz2#e4c94f80aef025c17ab0828cd85ef535 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f -https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-h27087fc_0.tar.bz2#c4fbad8d4bddeb3c085f18cbf97fbfad -https://conda.anaconda.org/conda-forge/linux-64/fftw-3.3.10-nompi_hf0379b8_105.tar.bz2#9d3e01547ba04a57372beee01158096f -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-orc-0.4.33-h166bdaf_0.tar.bz2#879c93426c9d0b84a9de4513fbce5f4f -https://conda.anaconda.org/conda-forge/linux-64/icu-70.1-h27087fc_0.tar.bz2#87473a15119779e021c314249d4b4aed -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 -https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdb-6.2.32-h9c3ff4c_0.tar.bz2#3f3258d8f841fbac63b36b75bdac1afd -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.14-h166bdaf_0.tar.bz2#fc84a0446e4e4fb882e78d786cfb9734 -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libtool-2.4.6-h9c3ff4c_1008.tar.bz2#16e143a1ed4b4fd169536373957f6fee -https://conda.anaconda.org/conda-forge/linux-64/libudev1-252-h166bdaf_0.tar.bz2#174243089ec111479298a5b7099b64b5 -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.32.1-h7f98852_1000.tar.bz2#772d69f030955d9646d3d0eaf21d859d -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.4-h166bdaf_0.tar.bz2#ac2ccf7323d21f2994e4d1f5da664f37 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.3-h9c3ff4c_1.tar.bz2#fbe97e8fa6f275d7c76a09e795adc3e6 -https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.30.2-h27087fc_1.tar.bz2#2fe2a839394ef3a1825a5e5e296060bc -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.3-h27087fc_1.tar.bz2#4acfc691e64342b9dae57cf2adc63238 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.32-h9c3ff4c_1.tar.bz2#29ded371806431b0499aaee146abfc3e -https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1s-h166bdaf_0.tar.bz2#e17553617ce05787d97715177be014d1 -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.7.0-h924138e_0.tar.bz2#819421f81b127a5547bf96ad57eccdd9 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.9-h7f98852_0.tar.bz2#bf6f803a544f26ebbdc3bfff272eb179 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libcap-2.66-ha37c62d_0.tar.bz2#2d7665abd0997f1a6d4b7596bc27b657 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.10-h9b69904_4.tar.bz2#390026683aef81db27ff1b8570ca1336 -https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.45-hc0c96e0_0.tar.bz2#839aeb24ab885a7b902247a6d943d02f -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.40.0-h753d276_0.tar.bz2#2e5f9a37d487e1019fd4d8113adb2f9f -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.10.3-h7463322_0.tar.bz2#3b933ea47ef8f330c4c068af25fcd6a8 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-15.0.5-he0ac6c6_0.tar.bz2#5c4783b468153a1d8f33874c5bb55864 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.31-haf5c9bc_0.tar.bz2#0249d755f8d26cb2ac796f9f01cfb823 -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b -https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h6239696_4.tar.bz2#adcf0be7897e73e312bd24353b613f74 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.7.3-h2599c5e_0.tar.bz2#4feea9466084c6948bd59539f1c0bb72 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_0.tar.bz2#4e54cbfc47b8c74c2ecc1e7730d8edce -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.19.3-h3790be6_0.tar.bz2#7d862b05445123144bec92cb1acc8ef8 -https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.74.1-h606061b_1.tar.bz2#ed5349aa96776e00b34eccecf4a948fe -https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.5-h63197d8_0.tar.bz2#339faf1a5e13c0d4abab84405847ad13 -https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.1.0-h27087fc_0.tar.bz2#02fa0b56a57c8421d1195bf0c021e682 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h55922b4_4.tar.bz2#901791f0ec7cddc8714e76e273013a91 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.0.3-he3ba5ed_0.tar.bz2#f9dbabc7e01c459ed7a1d1d64b206e9b -https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.1.0-h84fe81f_915.tar.bz2#b9c8f925797a93dbff45e1626b025a6b -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.31-h28c427c_0.tar.bz2#455d44a05123f30f66af2ca2a9652b5f -https://conda.anaconda.org/conda-forge/linux-64/python-3.11.0-h582c2e5_0_cpython.tar.bz2#ac6e08a5519c81473b4f962660d36608 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.40.0-h4ff8645_0.tar.bz2#bb11803129cbbb53ed56f9506ff74145 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-h166bdaf_0.tar.bz2#384e7fcb3cd162ba3e4aed4b687df566 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h166bdaf_0.tar.bz2#637054603bb7594302e3bf83f0a99879 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-h166bdaf_0.tar.bz2#732e22f1741bccea861f5668cf7342a7 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h166bdaf_0.tar.bz2#0a8e20a8aef954390b9481a527421a8c -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.32-py311ha362b79_1.tar.bz2#b24f3bc51bda5364df92f39b9256a2a6 -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.1-hc2a2eb6_0.tar.bz2#78415f0180a8d9c5bcc47889e00d5fb1 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.74.1-h6239696_1.tar.bz2#5f442e6bc9d89ba236eb25a25c5c2815 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/linux-64/jack-1.9.21-he978b8e_1.tar.bz2#5cef21ebd70a90a0d28127543a8d3739 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py311h4dd048b_1.tar.bz2#46d451f575392c01dc193069bd89766d -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.14-h6ed2654_0.tar.bz2#dcc588839de1445d90995a0a2c4f3a39 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a -https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.5-default_h3a83d3e_0.tar.bz2#ae4ab2853ffd9165ac91e91f64e4539d -https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h3e49a29_2.tar.bz2#3b88f1d0fe2580594d58d7e44d664617 -https://conda.anaconda.org/conda-forge/linux-64/libpq-14.5-hd77ab85_1.tar.bz2#f5c8135a70758d928a8126998a6558d8 -https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-252-h2a991cd_0.tar.bz2#3c5ae9f61f663b3d5e1bf7f7da0c85f5 -https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2022.1.0-ha770c72_916.tar.bz2#69ba49e445f87aea2cba343a71a35ca2 -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.78-h2350873_0.tar.bz2#ab3df39f96742e6f1a9878b09274c1dc -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h7d73246_1.tar.bz2#a11b4df9271a8d7917686725aa04c8f2 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pytz-2022.6-pyhd8ed1ab_0.tar.bz2#b1f26ad83328e486910ef7f6e81dc061 -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py311hd4cff14_1.tar.bz2#4d86cd6dbdc1185f4e72d974f1f1f852 -https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h166bdaf_0.tar.bz2#c9b568bd804cb2903c6be6f5f68182e4 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.38.0-py311hd4cff14_1.tar.bz2#871b97970cf7420780f79a62fef8eb48 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.74.1-h6239696_1.tar.bz2#f3220a9e9d3abcbfca43419a219df7e4 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_mkl.tar.bz2#361bf757b95488de76c4f123805742d3 -https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.5-default_h2e3cab8_0.tar.bz2#bb1c595d445929e240a806bff0e67d9c -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.tar.bz2#a2f166748917d6d6e4707841ca1f519e -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py311h9461556_3.tar.bz2#03ff0e369f200145f55f94a7a5be1cc4 -https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-16.1-h4a94279_0.tar.bz2#7a499b94463000c83e349fffb6ce2631 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.21.2-hd4edc92_0.conda#3ae425efddb9da5fb35edda331e4dff7 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_mkl.tar.bz2#44ccc4d4dca6a8d57fa17442bc64b5a1 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.23.5-py311h7d28db0_0.conda#de8cf17747d9efed488cafea2c39c9a1 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.5-py311ha362b79_0.conda#f6dd6ba47e2380b9c715fc45f0d45e62 -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-16_linux64_mkl.tar.bz2#3f92c1c9e1c0e183462c5071aa02cae1 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.6-py311h4dd048b_0.tar.bz2#d97ffb1b2692d8846d3fc1f20766eb08 -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.21.2-h3e40eee_0.conda#52cbed7e92713cf01b76445530396695 -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.5.2-py311h8b32b4d_0.conda#d203d6938a0c1a76cb540a2972644af7 -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py311ha362b79_2.tar.bz2#d250de3c3013c210865cc033164d6b60 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.9.3-py311h69910c8_2.tar.bz2#bb44baf80c9e22d4581dea2c030adb1c -https://conda.anaconda.org/conda-forge/linux-64/blas-2.116-mkl.tar.bz2#c196a26abf6b4f132c88828ab7c2231c -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.6.2-py311he728205_0.tar.bz2#96ec1bd38ecfc5ead0ac1eb8c4bf35ff -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py311h59ea3da_2.tar.bz2#4521a31493dbc02ffee57c524967b847 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.6-h7acdfc8_2.conda#7ec7d259b6d725ca952d40e2355e192c -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py311h3408d8f_2.tar.bz2#5bf133633260e9d8d3f9a50ef78b49b2 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.6.2-py311h38be061_0.tar.bz2#190a1bc60c0f7053daad403fa745fef3 diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock index cf7dba375a6a2..c0e54faa37bc6 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock @@ -1,130 +1,129 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 71e12e5567c1774957288c7db48fdb8c9ad13a8d69bf8e9bb6790429d0b35dcc +# input_hash: 05036df523e23d48cff7b6355ca081c5e5b41d8c5078cb9e1352f79e661d0549 @EXPLICIT -https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h0d85af4_4.tar.bz2#37edc4e6304ca87316e160f5ca0bd1b5 -https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2022.9.24-h033912b_0.tar.bz2#67b268c32433047914482def1ce215c2 -https://conda.anaconda.org/conda-forge/osx-64/jpeg-9e-hac89ed1_2.tar.bz2#60d90a3f5803660c5c2a2e9d883df0a6 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.0.9-hb7f2c08_8.tar.bz2#37157d273eaf3bc7d6862104161d9ec9 -https://conda.anaconda.org/conda-forge/osx-64/libcxx-14.0.6-hccf4f1f_0.tar.bz2#208a6a874b073277374de48a782f6b10 -https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.14-hb7f2c08_0.tar.bz2#ce2a6075114c9b64ad8cace52492feee +https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h10d778d_5.conda#6097a6ca9ada32699b5fc4312dd6ef18 +https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2024.2.2-h8857fd0_0.conda#f2eacee8c33c43692f1ccfd33d0f50b1 +https://conda.anaconda.org/conda-forge/osx-64/icu-73.2-hf5e326d_0.conda#5cc301d759ec03f28328428e28f65591 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.1.0-h0dc2134_1.conda#9e6c31441c9aa24e41ace40d6151aab6 +https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.20-h49d49c5_0.conda#d46104f6a896a0bc6a1d37b88b2edf5c +https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.6.2-h73e2aa4_0.conda#3d1d51c8f716d97c864d12f7af329526 https://conda.anaconda.org/conda-forge/osx-64/libffi-3.4.2-h0d85af4_5.tar.bz2#ccb34fb14960ad8b125962d3d79b31a9 -https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-11.3.0-h824d247_26.tar.bz2#815db11aee25eff0dbb5f91e0cbac6cf -https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hac89ed1_0.tar.bz2#691d103d11180486154af49c037b7ed9 -https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.2.4-h775f41a_0.tar.bz2#28807bef802a354f9c164e7ab242c5cb -https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.2.13-hfd90126_4.tar.bz2#35eb3fce8d51ed3c1fd4122bad48250b -https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-15.0.5-h61d9ccf_0.tar.bz2#81ceb8ca1476f31cbaacf7ac845b6fff -https://conda.anaconda.org/conda-forge/osx-64/mkl-include-2022.1.0-h6bab518_928.tar.bz2#67f8511a5eaf693a202486f74035b3f7 -https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.3-h96cf925_1.tar.bz2#76217ebfbb163ff2770a261f955a5861 +https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-12.3.0-h0b6f5ec_3.conda#39eeea5454333825d72202fae2d5e0b8 +https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hd75f5a5_2.conda#6c3628d047e151efba7cf08c5e54d1ca +https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-3.0.0-h0dc2134_1.conda#72507f8e3961bc968af17435060b6dd6 +https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.4.0-h10d778d_0.conda#b2c0047ea73819d992484faacbbe1c24 +https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.2.13-h8a1eda9_5.conda#4a3ad23f6e16f99c04e166767193d700 +https://conda.anaconda.org/conda-forge/osx-64/mkl-include-2023.2.0-h6bab518_50500.conda#835abb8ded5e26f23ea6996259c7972e +https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.5-h5846eda_0.conda#02a888433d165c99bf09784a7b14d900 https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-hc929b4f_1001.tar.bz2#addd19059de62181cd11ae8f4ef26084 -https://conda.anaconda.org/conda-forge/osx-64/python_abi-3.11-3_cp311.conda#5e0a069a585445333868d2c6651c3b3f -https://conda.anaconda.org/conda-forge/noarch/tzdata-2022f-h191b570_0.tar.bz2#e366350e2343a798e29833286abe2560 -https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.9-h35c211d_0.tar.bz2#c5049997b2e98edfbcdd294582f66281 +https://conda.anaconda.org/conda-forge/osx-64/python_abi-3.12-4_cp312.conda#87201ac4314b911b74197e588cca3639 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.11-h0dc2134_0.conda#9566b4c29274125b0266d0177b5eb97b https://conda.anaconda.org/conda-forge/osx-64/xorg-libxdmcp-1.1.3-h35c211d_0.tar.bz2#86ac76d6bf1cbb9621943eb3bd9ae36e https://conda.anaconda.org/conda-forge/osx-64/xz-5.2.6-h775f41a_0.tar.bz2#a72f9d4ea13d55d745ff1ed594747f10 -https://conda.anaconda.org/conda-forge/osx-64/gmp-6.2.1-h2e338ed_0.tar.bz2#dedc96914428dae572a39e69ee2a392f -https://conda.anaconda.org/conda-forge/osx-64/isl-0.25-hb486fe8_0.tar.bz2#45a9a46c78c0ea5c275b535f7923bde3 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h0dc2134_1.conda#9ee0bab91b2ca579e10353738be36063 +https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h0dc2134_1.conda#8a421fe09c6187f0eb5e2338a8a8be6d +https://conda.anaconda.org/conda-forge/osx-64/libcxx-17.0.6-h88467a6_0.conda#0fe355aecb8d24b8bc07c763209adbd9 +https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.43-h92b6c6a_0.conda#65dcddb15965c9de2c0365cb14910532 +https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.45.3-h92b6c6a_0.conda#68e462226209f35182ef66eda0f794ff +https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.15-hb7f2c08_0.conda#5513f57e0238c87c12dffedbcc9c1a4a +https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.12.7-h3e169fe_0.conda#4c04ba47fdd2ebecc1d3b6a77534d9ef +https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-18.1.5-h39e0ece_0.conda#ee12a644568269838b91f901b2537425 +https://conda.anaconda.org/conda-forge/osx-64/openssl-3.3.0-hd75f5a5_0.conda#eb8c33aa7929a7714eab8b90c1d88afe +https://conda.anaconda.org/conda-forge/osx-64/readline-8.2-h9e318b2_1.conda#f17f77f2acf4d344734bda76829ce14e +https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.13-h1abcd95_1.conda#bf830ba5afc507c6232d4ef0fb1a882d +https://conda.anaconda.org/conda-forge/osx-64/zlib-1.2.13-h8a1eda9_5.conda#75a8a98b1c4671c5d2897975731da42d +https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.6-h915ae27_0.conda#4cb2cd56f039b129bb0e491c1164167e +https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h0dc2134_1.conda#ece565c215adcc47fc1db4e651ee094b +https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h60636b9_2.conda#25152fce119320c980e5470e64834b50 +https://conda.anaconda.org/conda-forge/osx-64/gmp-6.3.0-h73e2aa4_1.conda#92f8d748d95d97f92fc26cfac9bb5b6e +https://conda.anaconda.org/conda-forge/osx-64/isl-0.26-imath32_h2e86a7b_101.conda#d06222822a9144918333346f145b68c6 https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2#f9d6a4c82889d5ecedec1d90eb673c55 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.0.9-hb7f2c08_8.tar.bz2#7f952a036d9014b4dab96c6ea0f8c2a7 -https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.0.9-hb7f2c08_8.tar.bz2#b36a3bfe866d9127f25f286506982166 -https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-11.3.0-h082f757_26.tar.bz2#11835360754e5caca43cfaa3a81dfca5 -https://conda.anaconda.org/conda-forge/osx-64/libllvm14-14.0.6-h5b596cc_1.tar.bz2#c61f692b0e98efc1ef772fdf7d14e81a -https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.39-ha978bb4_0.conda#35e4928794c5391aec14ffdf1deaaee5 -https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.40.0-ha978bb4_0.tar.bz2#ceb13b6726534b96e3b4e3dda91e9050 -https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.13-h0d85af4_1004.tar.bz2#eb7860935e14aec936065cbc21a1a962 -https://conda.anaconda.org/conda-forge/osx-64/openssl-3.0.7-hfd90126_0.tar.bz2#78d8266753a5db378ef0f9302be9990f -https://conda.anaconda.org/conda-forge/osx-64/readline-8.1.2-h3899abd_0.tar.bz2#89fa404901fa8fb7d4f4e07083b8d635 -https://conda.anaconda.org/conda-forge/osx-64/tapi-1100.0.11-h9ce4665_0.tar.bz2#f9ff42ccf809a21ba6f8607f8de36108 -https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.7.0-hb8565cd_0.tar.bz2#41dae453624c0b84c5921ad2efd45983 -https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.12-h5dbffcc_0.tar.bz2#8e9480d9c47061db2ed1b4ecce519a7f -https://conda.anaconda.org/conda-forge/osx-64/zlib-1.2.13-hfd90126_4.tar.bz2#be90e6223c74ea253080abae19b3bdb1 -https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.2-hfa58983_4.tar.bz2#0b446e84f3ccf085e590dc1f73eebe3f -https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.0.9-hb7f2c08_8.tar.bz2#aac5ad0d8f747ef7f871508146df75d9 -https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h3f81eb7_0.tar.bz2#6afb5b1664496c575117efe9aa2c9ba9 -https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp14-14.0.6-default_h55ffa42_0.tar.bz2#9b9bc2f878d47e6846e3d01ca0fcb921 -https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-9_5_0_h97931a8_26.tar.bz2#ac9c1a84323edab6c3ff9d3e586ab3cc -https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.4.0-hdb44e8a_4.tar.bz2#09195c43a896fe98b82dcebfa1d6eab1 -https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-14.0.6-h5b596cc_1.tar.bz2#d99491efd3d672b3496e9fc9273da7c0 -https://conda.anaconda.org/conda-forge/osx-64/mkl-2022.1.0-h860c996_928.tar.bz2#98a4d58de0ba6e61ce46620b775c19ce -https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.1.0-h0f52abe_1.tar.bz2#afe26b08c2d2265b4d663d199000e5da -https://conda.anaconda.org/conda-forge/osx-64/python-3.11.0-h559f36b_0_cpython.tar.bz2#9eac7bb07be3725945c23c4ae90f9faa +https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-13.2.0-h2873a65_3.conda#e4fb4d23ec2870ff3c40d10afe305aec +https://conda.anaconda.org/conda-forge/osx-64/libhwloc-2.10.0-default_h1321489_1000.conda#6f5fe4374d1003e116e2573022178da6 +https://conda.anaconda.org/conda-forge/osx-64/libllvm16-16.0.6-hbedff68_3.conda#8fd56c0adc07a37f93bd44aa61a97c90 +https://conda.anaconda.org/conda-forge/osx-64/ninja-1.12.1-h3c5361c_0.conda#a0ebabd021c8191aeb82793fe43cfdcb +https://conda.anaconda.org/conda-forge/osx-64/python-3.12.3-h1411813_0_cpython.conda#df1448ec6cbf8eceb03d29003cf72ae6 https://conda.anaconda.org/conda-forge/osx-64/sigtool-0.1.3-h88f4db0_0.tar.bz2#fbfb84b9de9a6939cb165c02c69b1865 -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/osx-64/brotli-1.0.9-hb7f2c08_8.tar.bz2#55f612fe4a9b5f6ac76348b6de94aaeb -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-2.1.1-pyhd8ed1ab_0.tar.bz2#c1d5b294fbf9a795dec349a6f4d8be8e -https://conda.anaconda.org/conda-forge/osx-64/clang-14-14.0.6-default_h55ffa42_0.tar.bz2#f4b08faae104f8a5483c06f7c6464b35 +https://conda.anaconda.org/conda-forge/osx-64/tapi-1100.0.11-h9ce4665_0.tar.bz2#f9ff42ccf809a21ba6f8607f8de36108 +https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h0dc2134_1.conda#9272dd3b19c4e8212f8542cefd5c3d67 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/osx-64/cython-0.29.32-py311h814d153_1.tar.bz2#d470cb2ffe557d78c7fa324ff39b66cb -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.4-py311hd2070f0_1.tar.bz2#5219e72a43e53e8f6af4fdf76a0f90ef -https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.14-h90f4b2a_0.tar.bz2#e56c432e9a78c63692fa6bd076a15713 -https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-609-hfd63004_11.conda#8881d41cb8fa1104d4545c6b7ddc9671 -https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-16_osx64_mkl.tar.bz2#96b23c2ca3208c5cb1ed34270448af5c -https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz2#524282b2c46c9dedf051b3bc2ae05494 -https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2022.1.0-h694c41f_929.tar.bz2#041ceef009fe6d29cbd2555907c23ab3 -https://conda.anaconda.org/conda-forge/osx-64/mpc-1.2.1-hbb51d92_0.tar.bz2#9f46d6ad4c460679ee997abc10da3bac +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/osx-64/cython-3.0.10-py312hede676d_0.conda#3008aa88f0dc67e7144734b16e331ee4 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.5-py312h49ebfd2_1.conda#21f174a5cfb5964069c374171a979157 +https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-711-ha20a434_0.conda#a8b41eb97c8a9d618243a79ba78fdc3c +https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp16-16.0.6-default_h7151d67_6.conda#7eaad118ab797d1427f8745c861d1925 +https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-13_2_0_h97931a8_3.conda#0b6e23a012ee7a9a5f6b244f5a92c1d5 +https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.6.0-h129831d_3.conda#568593071d2e6cea7b5fc1f75bfa10ca +https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-16.0.6-hbedff68_3.conda#e9356b0807462e8f84c1384a8da539a5 +https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.2.1-h4f6b447_1.conda#b90df08f0deb2f58631447c1462c92a7 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.0-h5d0d7b0_1.tar.bz2#be533cc782981a0ec5eed28aa618470a -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pycparser-2.21-pyhd8ed1ab_0.tar.bz2#076becd9e05608f8dc72757d5f3a91ff -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/pytz-2022.6-pyhd8ed1ab_0.tar.bz2#b1f26ad83328e486910ef7f6e81dc061 -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.12.0-h7728843_0.conda#e4fb6f4700d8890c36cbf317c2c6d0cb +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/osx-64/tornado-6.2-py311h5547dcb_1.tar.bz2#bc9918caedfa2de9e582104bf605d57d -https://conda.anaconda.org/conda-forge/osx-64/ccache-4.7.3-h2822714_0.tar.bz2#a119676fd25b0268da665107f7176ec6 -https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-973.0.1-hcc6d90d_11.conda#f1af817221bc31e7c770e1ea15374355 -https://conda.anaconda.org/conda-forge/osx-64/cffi-1.15.1-py311ha86e640_2.tar.bz2#6b2c5fa2e823356561717fc8b8ce3433 -https://conda.anaconda.org/conda-forge/osx-64/clang-14.0.6-h694c41f_0.tar.bz2#77667c3c75b88f12782f628d171ffeda -https://conda.anaconda.org/conda-forge/osx-64/coverage-6.5.0-py311h5547dcb_1.tar.bz2#5adc116748636d56a17e9068081db5ca -https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.38.0-py311h5547dcb_1.tar.bz2#6fc564da4dd28e360f4cfee7bee95cf9 -https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-11.3.0-h1f927f5_26.tar.bz2#f1b788b41dc5171493563686023a165c -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/osx-64/ld64-609-hc6ad406_11.conda#9e14075f26a915bc6180b40789138adf -https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-16_osx64_mkl.tar.bz2#430c4d18fd8bbc987c4367f5d16135cf -https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-16_osx64_mkl.tar.bz2#757f1ae46973ce6542784d99b9984d8d -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/osx-64/pillow-9.2.0-py311he7df5c9_3.tar.bz2#98a9590d51ca20ae722ae5f850ddc6ca -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/osx-64/brotlipy-0.7.0-py311h5547dcb_1005.tar.bz2#5f97ac938a90d06eebea42c321abe0d7 -https://conda.anaconda.org/conda-forge/osx-64/cctools-973.0.1-h76f1dac_11.conda#77d8192c013d7a4a355aee5b0ae1ae20 -https://conda.anaconda.org/conda-forge/osx-64/clangxx-14.0.6-default_h55ffa42_0.tar.bz2#6a46064b0506895d090302433e70397b -https://conda.anaconda.org/conda-forge/osx-64/cryptography-38.0.3-py311h61927ef_0.tar.bz2#dbbef5733e57a4e785057125017340b5 -https://conda.anaconda.org/conda-forge/osx-64/liblapacke-3.9.0-16_osx64_mkl.tar.bz2#ba52eebcca282a5abaa3d3ac79cf2b05 -https://conda.anaconda.org/conda-forge/osx-64/numpy-1.23.5-py311h62c7003_0.conda#e8c8aa5d60b4d22153c1f0fdb8b1bb22 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-16_osx64_mkl.tar.bz2#2fb6331f94446754c896d1f11d3afa1c -https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-14.0.6-hab78ec2_0.tar.bz2#4fdde3f4ed31722a1c811723f5db82f0 -https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.0.6-py311hd2070f0_0.tar.bz2#7aff06dca8dc89b96ba3b8caeb6dc2c9 -https://conda.anaconda.org/conda-forge/osx-64/pandas-1.5.2-py311hd84f3f5_0.conda#c061bfc7a65e7b7a1757d2476056acc3 -https://conda.anaconda.org/conda-forge/noarch/pyopenssl-22.1.0-pyhd8ed1ab_0.tar.bz2#fbfa0a180d48c800f922a10a114a8632 -https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.0.0-pyhd8ed1ab_0.tar.bz2#c9e3f8bfdb9bfc34aa1836a6ed4b25d7 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/osx-64/scipy-1.9.3-py311h939689b_2.tar.bz2#ad8a377dabefbd942989ff55e3c97e16 -https://conda.anaconda.org/conda-forge/osx-64/blas-2.116-mkl.tar.bz2#bcaf774ad76aa575f4b60c585c2a8dab -https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-14.0.6-h613da45_0.tar.bz2#b44e0625319f9933e584dc3b96f5baf7 -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.6.2-py311h2bf763f_0.tar.bz2#23cef32adc676da209c6c4874f29523f -https://conda.anaconda.org/conda-forge/osx-64/pyamg-4.2.3-py311h349b758_2.tar.bz2#59bc03179823f04c8647df161695e8cc -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 -https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-14.0.6-h3113cd8_4.conda#e1828ef1597292a9ea25627fdfacb9f3 -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.6.2-py311h6eed73b_0.tar.bz2#b3db01070d46627acacf2d9d582b4643 -https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_1.tar.bz2#089382ee0e2dc2eae33a04cc3c2bddb0 -https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.5.1-hbf74d83_0.tar.bz2#674d19e83a1d0e9abfb2c9875c5457c5 -https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-14.0.6-h6f97653_4.conda#f9f2cc37068e5f2f4332793640329fe3 -https://conda.anaconda.org/conda-forge/noarch/codecov-2.1.12-pyhd8ed1ab_0.conda#0317ed52e504b93da000e8a027628775 -https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-11.3.0-h18f7dce_0.tar.bz2#72320d23ed499315d1d1ac332b94bc66 -https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.5.1-hb8565cd_0.tar.bz2#6389aafc7083db9c452aa6038abef6cc -https://conda.anaconda.org/conda-forge/osx-64/gfortran-11.3.0-h2c809b3_0.tar.bz2#db5338d1fb1ad08498bdc1b42277a0d5 -https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.5.1-haad3a49_0.tar.bz2#6cad466ef506a8100204658e072da710 -https://conda.anaconda.org/conda-forge/osx-64/compilers-1.5.1-h694c41f_0.tar.bz2#98ef60b72672abd819ae7dfc1fbdd160 +https://conda.anaconda.org/conda-forge/osx-64/tornado-6.4-py312h41838bb_0.conda#2d2d1fde5800d45cb56218583156d23d +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-986-ha1c5b94_0.conda#a8951de2506df5649f5a3295fdfd9f2c +https://conda.anaconda.org/conda-forge/osx-64/clang-16-16.0.6-default_h7151d67_6.conda#1c298568c30efe7d9369c7c15b748461 +https://conda.anaconda.org/conda-forge/osx-64/coverage-7.5.1-py312h520dd33_0.conda#afc8c7b237683760a3c35e49bcc04deb +https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.51.0-py312h41838bb_0.conda#ebe40134b860cf704ddaf81f684f95a5 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.16-ha2f27b4_0.conda#1442db8f03517834843666c422238c9b +https://conda.anaconda.org/conda-forge/osx-64/ld64-711-ha02d983_0.conda#3ae4930ec076735cce481e906f5192e0 +https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz2#524282b2c46c9dedf051b3bc2ae05494 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/osx-64/mkl-2023.2.0-h54c2260_50500.conda#0a342ccdc79e4fcd359245ac51941e7b +https://conda.anaconda.org/conda-forge/osx-64/mpc-1.3.1-h81bd1dd_0.conda#c752c0eb6c250919559172c011e5f65b +https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.2-h7310d3a_0.conda#05a14cc9d725dd74995927968d6547e3 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/osx-64/ccache-4.9.1-h41adc32_0.conda#45aaf96b67840bd98a928de8679098fa +https://conda.anaconda.org/conda-forge/osx-64/cctools-986-h40f6528_0.conda#b7a2ca0062a6ee8bc4e83ec887bef942 +https://conda.anaconda.org/conda-forge/osx-64/clang-16.0.6-hdae98eb_6.conda#884e7b24306e4f21b7ee08dabadb2ecc +https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-12.3.0-hc328e78_3.conda#b3d751dc7073bbfdfa9d863e39b9685d +https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-20_osx64_mkl.conda#160fdc97a51d66d51dc782fb67d35205 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2023.2.0-h694c41f_50500.conda#1b4d0235ef253a1e19459351badf4f9f +https://conda.anaconda.org/conda-forge/osx-64/pillow-10.3.0-py312h0c923fa_0.conda#6f0591ae972e9b815739da3392fbb3c3 +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/osx-64/clangxx-16.0.6-default_h7151d67_6.conda#cc8c007a529a7cfaa5d29d8599df3fe6 +https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-20_osx64_mkl.conda#51089a4865eb4aec2bc5c7468bd07f9f +https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-20_osx64_mkl.conda#58f08e12ad487fac4a08f90ff0b87aec +https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-16.0.6-ha38d28d_2.conda#7a46507edc35c6c8818db0adaf8d787f +https://conda.anaconda.org/conda-forge/osx-64/liblapacke-3.9.0-20_osx64_mkl.conda#124ae8e384268a8da66f1d64114a1eda +https://conda.anaconda.org/conda-forge/osx-64/numpy-1.26.4-py312he3a82b2_0.conda#96c61a21c4276613748dba069554846b +https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-20_osx64_mkl.conda#cc3260179093918b801e373c6e888e02 +https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-16.0.6-ha38d28d_2.conda#3b9e8c5c63b8e86234f499490acd85c2 +https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.2.1-py312h9230928_0.conda#079df34ce7c71259cfdd394645370891 +https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.2-py312h83c8a23_0.conda#b422a5d39ff0cd72923aef807f280145 +https://conda.anaconda.org/conda-forge/osx-64/scipy-1.13.0-py312h741d2f9_1.conda#c416453a8ea3b38d823fe8dcecdb6a12 +https://conda.anaconda.org/conda-forge/osx-64/blas-2.120-mkl.conda#b041a7677a412f3d925d8208936cb1e2 +https://conda.anaconda.org/conda-forge/osx-64/clang_impl_osx-64-16.0.6-h8787910_14.conda#fc1a7d3f1bf236f63c58bab6e36844cb +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.8.4-py312h1fe5000_0.conda#3e3097734a5042cb6d2675e69bf1fc5a +https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.1.0-py312h3db3e91_0.conda#c6d6248b99fc11b15c9becea581a1462 +https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-16.0.6-hb91bd55_14.conda#3d0d9c725912bb0cb4cd301d2a5d31d7 +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.8.4-py312hb401068_0.conda#187ee42addd449b4899b55c304012436 +https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.7.0-h282daa2_1.conda#d27411cb82bc1b76b9f487da6ae97f1d +https://conda.anaconda.org/conda-forge/osx-64/clangxx_impl_osx-64-16.0.6-h6d92fbe_14.conda#66b9f06d5f0d0ea47ffcb3a9ca65774a +https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-12.3.0-h18f7dce_1.conda#436af2384c47aedb94af78a128e174f1 +https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-16.0.6-hb91bd55_14.conda#a4504c1a7beab8875d6f765941e77248 +https://conda.anaconda.org/conda-forge/osx-64/gfortran-12.3.0-h2c809b3_1.conda#c48adbaa8944234b80ef287c37e329b0 +https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.7.0-h7728843_1.conda#e04cb15a20553b973dd068c2dc81d682 +https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.7.0-h6c2ab21_1.conda#48319058089f492d5059e04494b81ed9 +https://conda.anaconda.org/conda-forge/osx-64/compilers-1.7.0-h694c41f_1.conda#875e9b06186a41d55b96b9c1a52f15be diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml index 5bcc09b32fffa..cfa1b7689a4ad 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml +++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml @@ -14,10 +14,12 @@ dependencies: - matplotlib - pandas - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow - - codecov + - pip + - ninja + - meson-python - pytest-cov - coverage - ccache diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml index 93bb7769f4473..01bd378aa121a 100644 --- a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml +++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml @@ -5,19 +5,23 @@ channels: - defaults dependencies: - python - - numpy=1.22 + - numpy - blas[build=mkl] - - scipy - - cython + - scipy<1.12 - joblib - - threadpoolctl - matplotlib - pandas - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow - - codecov + - pip + - ninja + - meson-python - pytest-cov - - coverage=6.2 + - coverage - ccache + - pip + - pip: + - cython + - threadpoolctl diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock index ac190e8454e1a..ec92612048448 100644 --- a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock @@ -1,92 +1,86 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 63f973e661f241c8cb9b0feab317eeb8fa0c7aeec7b48a6c069aedc821b80c44 +# input_hash: e0d2cf2593df1f2c6969d68cf849136bee785b51f6cfc50ea1bdca2143d4a051 @EXPLICIT https://repo.anaconda.com/pkgs/main/osx-64/blas-1.0-mkl.conda#cb2c87e85ac8e0ceae776d26d4214c8a -https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2022.10.11-hecd8cb5_0.conda#47d4ae6c764c72394363ca6daa50e6d0 -https://repo.anaconda.com/pkgs/main/osx-64/fftw-3.3.9-h9ed2024_1.conda#9f854d761737b9a8bf9859779a5bb405 -https://repo.anaconda.com/pkgs/main/osx-64/giflib-5.2.1-haf1e3a3_0.conda#0c36d6800a1a0f0ae244699a09d3f982 -https://repo.anaconda.com/pkgs/main/osx-64/intel-openmp-2021.4.0-hecd8cb5_3538.conda#65e79d0ffef79cbb8ebd3c71e74eb50a -https://repo.anaconda.com/pkgs/main/osx-64/jpeg-9e-hca72f7f_0.conda#99b7d820514a0c07818d58c320ab21fc -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlicommon-1.0.9-hca72f7f_7.conda#6c865b9e76fa2fad0c8ac32aa0f01f75 +https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h6c40b1e_6.conda#96224786021d0765ce05818fa3c59bdb +https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2024.3.11-hecd8cb5_0.conda#a2e29a11940c66baf9942912096fad5f +https://repo.anaconda.com/pkgs/main/osx-64/jpeg-9e-h6c40b1e_1.conda#fc3e61fa41309946c9283fe8737d7f41 +https://repo.anaconda.com/pkgs/main/osx-64/libbrotlicommon-1.0.9-h6c40b1e_8.conda#8e86dfa34b08bc664b19e1499e5465b8 https://repo.anaconda.com/pkgs/main/osx-64/libcxx-14.0.6-h9765a3e_0.conda#387757bb354ae9042370452cd0fb5627 -https://repo.anaconda.com/pkgs/main/osx-64/libdeflate-1.8-h9ed2024_5.conda#584dec4a4ba735d8d7841de1948b23b1 -https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.2.4-hca72f7f_0.conda#4196bca3e5be38659521163af8918460 +https://repo.anaconda.com/pkgs/main/osx-64/libdeflate-1.17-hb664fd8_1.conda#b6116b8db33ea6a5b5287dae70d4a913 +https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.4.4-hecd8cb5_1.conda#eb7f09ada4d95f1a26f483f1009d9286 +https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.3.2-h6c40b1e_0.conda#d8fd9f599dd4e012694e69d119016442 https://repo.anaconda.com/pkgs/main/osx-64/llvm-openmp-14.0.6-h0dcd299_0.conda#b5804d32b87dc61ca94561ade33d5f2d -https://repo.anaconda.com/pkgs/main/osx-64/ncurses-6.3-hca72f7f_3.conda#dba236b91a8c0ef6ddecc56e387e92d2 -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2022f-h04d1e81_0.conda#02f786cfa9e5c45d8439799445287030 -https://repo.anaconda.com/pkgs/main/osx-64/xz-5.2.6-hca72f7f_0.conda#0a0111f0dc09d5652cfe6a905f90985b -https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4dc903c_0.conda#d0202dd912bfb45d3422786531717882 +https://repo.anaconda.com/pkgs/main/osx-64/ncurses-6.4-hcec6c5f_0.conda#0214d1ee980e217fabc695f1e40662aa +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 +https://repo.anaconda.com/pkgs/main/osx-64/xz-5.4.6-h6c40b1e_1.conda#b40d69768d28133d8be1843def4f82f5 +https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4b97444_1.conda#38e35f7c817fac0973034bfce6706ec2 https://repo.anaconda.com/pkgs/main/osx-64/ccache-3.7.9-hf120daa_0.conda#a01515a32e721c51d631283f991bc8ea +https://repo.anaconda.com/pkgs/main/osx-64/expat-2.6.2-hcec6c5f_0.conda#c748234dd7e242784198ab038372cb0c +https://repo.anaconda.com/pkgs/main/osx-64/intel-openmp-2023.1.0-ha357a0b_43548.conda#ba8a89ffe593eb88e4c01334753c40c3 https://repo.anaconda.com/pkgs/main/osx-64/lerc-3.0-he9d5cce_0.conda#aec2c3dbef836849c9260f05be04f3db -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlidec-1.0.9-hca72f7f_7.conda#b85983951745cc666d9a1b42894210b2 -https://repo.anaconda.com/pkgs/main/osx-64/libbrotlienc-1.0.9-hca72f7f_7.conda#e306d7a1599202a7c95762443f110832 -https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.3-hb1e8313_2.conda#0c959d444ac65555cb836cdbd3e9a2d9 +https://repo.anaconda.com/pkgs/main/osx-64/libbrotlidec-1.0.9-h6c40b1e_8.conda#6338cd7779e614fc16d835990e627e04 +https://repo.anaconda.com/pkgs/main/osx-64/libbrotlienc-1.0.9-h6c40b1e_8.conda#2af01a7b3fdbed47ebe5c452c34e5c5d https://repo.anaconda.com/pkgs/main/osx-64/libgfortran5-11.3.0-h9dfd629_28.conda#1fa1a27ee100b1918c3021dbfa3895a3 -https://repo.anaconda.com/pkgs/main/osx-64/libpng-1.6.37-ha441bb4_0.conda#d69245a20ec59d8dc534c65308607129 -https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.3-h23ab428_1.conda#dc70fec3978d3189741886cc05fcb145 -https://repo.anaconda.com/pkgs/main/osx-64/mkl-2021.4.0-hecd8cb5_637.conda#07d14ece4a852cefa17c1c156db8134e -https://repo.anaconda.com/pkgs/main/osx-64/openssl-1.1.1s-hca72f7f_0.conda#180ff0f1449f1d62dc91495e5aef2902 +https://repo.anaconda.com/pkgs/main/osx-64/libpng-1.6.39-h6c40b1e_0.conda#a3c824835f53ad27aeb86d2b55e47804 +https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.4-hcec6c5f_1.conda#aee0efbb45220e1985533dbff48551f8 +https://repo.anaconda.com/pkgs/main/osx-64/ninja-base-1.10.2-haf03e11_5.conda#c857c13129710a61395270656905c4a2 +https://repo.anaconda.com/pkgs/main/osx-64/openssl-3.0.13-hca72f7f_1.conda#e526d7e2e79132a11b4746cf305c45b5 https://repo.anaconda.com/pkgs/main/osx-64/readline-8.2-hca72f7f_0.conda#971667436260e523f6f7355fdfa238bf -https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.12-h5d9f67b_0.conda#047f0af5486d19163e37fd7f8ae3d29f -https://repo.anaconda.com/pkgs/main/osx-64/brotli-bin-1.0.9-hca72f7f_7.conda#110bdca1a20710820e61f7fa3047f737 +https://repo.anaconda.com/pkgs/main/osx-64/tbb-2021.8.0-ha357a0b_0.conda#fb48530a3eea681c11dafb95b3387c0f +https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.14-h4d00af3_0.conda#a2c03940c2ae54614301ec82e6a98d75 +https://repo.anaconda.com/pkgs/main/osx-64/brotli-bin-1.0.9-h6c40b1e_8.conda#11053f9c6b8d8a8348d0c33450c23ce9 https://repo.anaconda.com/pkgs/main/osx-64/freetype-2.12.1-hd8bbffd_0.conda#1f276af321375ee7fe8056843044fa76 https://repo.anaconda.com/pkgs/main/osx-64/libgfortran-5.0.0-11_3_0_hecd8cb5_28.conda#2eb13b680803f1064e53873ae0aaafb3 -https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.40.0-h880c91c_0.conda#21b5dd3ef31a6b4daaafb7763170137b -https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.2-hcb37349_0.conda#d3ba225e3bc4285d8efd8cdfd7aa6112 -https://repo.anaconda.com/pkgs/main/osx-64/brotli-1.0.9-hca72f7f_7.conda#68e54d12ec67591deb2ffd70348fb00f -https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.4.0-h2cd0358_2.conda#3ca4a08eea7fd9cd88453d35915693a3 -https://repo.anaconda.com/pkgs/main/osx-64/python-3.9.15-hdfd78df_0.conda#35a0690ca2732a7c34425520c639dfb7 -https://repo.anaconda.com/pkgs/main/osx-64/attrs-22.1.0-py39hecd8cb5_0.conda#d0b7738bb61bd74eedfc833533dd14d4 -https://repo.anaconda.com/pkgs/main/osx-64/certifi-2022.9.24-py39hecd8cb5_0.conda#3f381091a2c319d87532b9932c67cdea -https://repo.anaconda.com/pkgs/main/noarch/charset-normalizer-2.0.4-pyhd3eb1b0_0.conda#e7a441d94234b2b5fafee06e25dbf076 -https://repo.anaconda.com/pkgs/main/osx-64/coverage-6.2-py39hca72f7f_0.conda#55962a70ebebc8de15c4e1d745b20cdd +https://repo.anaconda.com/pkgs/main/osx-64/mkl-2023.1.0-h8e150cf_43560.conda#85d0f3431dd5c6ae44f8725fdd3d3e59 +https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.45.3-h6c40b1e_0.conda#2edf909b937b3aad48322c9cb2e8f1a0 +https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.5-hc035e20_2.conda#c033bf68c12f8c71fd916f000f3dc118 +https://repo.anaconda.com/pkgs/main/osx-64/brotli-1.0.9-h6c40b1e_8.conda#10f89677a3898d0113dc354adf643df3 +https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.5.1-hcec6c5f_0.conda#e127a800ffd9d300ed7d5e1b026944ec +https://repo.anaconda.com/pkgs/main/osx-64/python-3.12.3-hd58486a_1.conda#cdc61e8f6c2d77b3b263e720048c4b54 +https://repo.anaconda.com/pkgs/main/osx-64/coverage-7.2.2-py312h6c40b1e_0.conda#b6e4b9fba325047c07f3c9211ae91d1c https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab -https://repo.anaconda.com/pkgs/main/osx-64/cython-0.29.32-py39he9d5cce_0.conda#e5d7d7620ab25447bc81dc91af7c57e0 https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 -https://repo.anaconda.com/pkgs/main/osx-64/idna-3.4-py39hecd8cb5_0.conda#60fb473352c9fe43b690d7b0b40cd47b https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 -https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.1.1-py39hecd8cb5_0.conda#8c96155e60c4723afd642a6cee396c26 -https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.2-py39he9d5cce_0.conda#6db2c99a6633b0cbd82faa1a36cd29d7 +https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.4.0-py312hecd8cb5_0.conda#0af12a3a87d9c8051ae6ba2ed2c3882a +https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.4-py312hcec6c5f_0.conda#2ba6561ddd1d05936fe74f5d118ce7dd https://repo.anaconda.com/pkgs/main/osx-64/lcms2-2.12-hf1fd2bf_0.conda#697aba7a3308226df7a93ccfeae16ffa -https://repo.anaconda.com/pkgs/main/osx-64/libwebp-1.2.4-h56c3ce4_0.conda#55aab5176f109c67c355ac018e5f7b4a -https://repo.anaconda.com/pkgs/main/noarch/munkres-1.1.4-py_0.conda#148362ba07f92abab76999a680c80084 -https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.0.0-py39hecd8cb5_1.conda#c5507133514846cc5f54dc4de9ba1563 -https://repo.anaconda.com/pkgs/main/noarch/py-1.11.0-pyhd3eb1b0_0.conda#7205a898ed2abbf6e9b903dff6abe08e -https://repo.anaconda.com/pkgs/main/noarch/pycparser-2.21-pyhd3eb1b0_0.conda#135a72ff2a31150a3a3ff0b1edd41ca9 -https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.0.9-py39hecd8cb5_0.conda#9b77837761d4351f49612991cd32127b -https://repo.anaconda.com/pkgs/main/osx-64/pysocks-1.7.1-py39hecd8cb5_0.conda#4765ca1a39ea5287cbe170734ac83e37 -https://repo.anaconda.com/pkgs/main/osx-64/pytz-2022.1-py39hecd8cb5_0.conda#a4ca27633e16749c7688884f842053c8 +https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py312h6c40b1e_1.conda#b1ef860be9043b35c5e8d9388b858514 +https://repo.anaconda.com/pkgs/main/osx-64/ninja-1.10.2-hecd8cb5_5.conda#a0043b325fb08db82477ae433668e684 +https://repo.anaconda.com/pkgs/main/osx-64/openjpeg-2.4.0-h66ea3da_0.conda#882833bd7befc5e60e6fba9c518c1b79 +https://repo.anaconda.com/pkgs/main/osx-64/packaging-23.2-py312hecd8cb5_0.conda#2b4e331c8f6df5d95a5dd3af37a34d89 +https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.0.0-py312hecd8cb5_1.conda#647fada22f1697691fdee90b52c99bcb +https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.0.9-py312hecd8cb5_0.conda#d85cf2b81c6d9326a57a6418e14db258 +https://repo.anaconda.com/pkgs/main/noarch/python-tzdata-2023.3-pyhd3eb1b0_0.conda#479c037de0186d114b9911158427624e +https://repo.anaconda.com/pkgs/main/osx-64/pytz-2024.1-py312hecd8cb5_0.conda#2b28ec0e0d07f5c0c701f75200b1e8b6 +https://repo.anaconda.com/pkgs/main/osx-64/setuptools-69.5.1-py312hecd8cb5_0.conda#5c7c7ef1e0762e3ca1f543d28310946f https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 -https://repo.anaconda.com/pkgs/main/noarch/threadpoolctl-2.2.0-pyh0d69192_0.conda#bbfdbae4934150b902f97daaf287efe2 https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a -https://repo.anaconda.com/pkgs/main/osx-64/tomli-2.0.1-py39hecd8cb5_0.conda#49318006e63c8628ce0a1e2e1433d30d -https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.2-py39hca72f7f_0.conda#2653da9c248d53e811364e65353c8742 -https://repo.anaconda.com/pkgs/main/osx-64/cffi-1.15.1-py39hc55c11b_0.conda#965f34484f6602adfcbe8418c2a16e17 -https://repo.anaconda.com/pkgs/main/noarch/fonttools-4.25.0-pyhd3eb1b0_0.conda#bb9c5b5a6d892fca5efe4bf0203b6a48 -https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py39h9ed2024_0.conda#68ed4da109042256b78f9c46537bd2a3 -https://repo.anaconda.com/pkgs/main/noarch/packaging-21.3-pyhd3eb1b0_0.conda#07bbfbb961db7fa329cc42716943ea62 -https://repo.anaconda.com/pkgs/main/osx-64/pillow-9.2.0-py39hde71d04_1.conda#ecd1fdbc77659c3bf4c056e0f8e703c7 -https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 -https://repo.anaconda.com/pkgs/main/osx-64/setuptools-65.5.0-py39hecd8cb5_0.conda#d7a09d5402d510409064000d25b7d436 -https://repo.anaconda.com/pkgs/main/osx-64/brotlipy-0.7.0-py39h9ed2024_1003.conda#a08f6f5f899aff4a07351217b36fae41 -https://repo.anaconda.com/pkgs/main/osx-64/cryptography-38.0.1-py39hf6deb26_0.conda#62e4840cdfb6d8b7656a30ece5e1ea1d -https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.22.3-py39h3b1a694_0.conda#f68019d1d839b40739b64b6feae2b436 -https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.1.2-py39hecd8cb5_0.conda#8239bdb679b675ab8aac1bdc0756d383 -https://repo.anaconda.com/pkgs/main/noarch/pyopenssl-22.0.0-pyhd3eb1b0_0.conda#1dbbf9422269cd62c7094960d9b43f36 -https://repo.anaconda.com/pkgs/main/noarch/pytest-cov-3.0.0-pyhd3eb1b0_0.conda#bbdaac2947f507399816d509107945c2 -https://repo.anaconda.com/pkgs/main/noarch/pytest-forked-1.3.0-pyhd3eb1b0_0.tar.bz2#07970bffdc78f417d7f8f1c7e620f5c4 -https://repo.anaconda.com/pkgs/main/noarch/pytest-xdist-2.5.0-pyhd3eb1b0_0.conda#d15cdc4207bcf8ca920822597f1d138d -https://repo.anaconda.com/pkgs/main/osx-64/urllib3-1.26.12-py39hecd8cb5_0.conda#49f78830138d7e4b24a35b289b4bf62f -https://repo.anaconda.com/pkgs/main/osx-64/requests-2.28.1-py39hecd8cb5_0.conda#c2a59bb72db0abd039ce447be18c139d -https://repo.anaconda.com/pkgs/main/noarch/codecov-2.1.11-pyhd3eb1b0_0.conda#83a743cc928162d53d4066c43468b2c7 -https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.3.5-py39h67323c0_0.conda#312133560b81ec1a2aaf95835e90b5e9 -https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.5.3-py39hecd8cb5_0.conda#25cf9d021c49d6ebb931743a702ad666 -https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.5.3-py39hfb0c5b7_0.conda#a62605b72e89b204a0944b67b4cf5554 -https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.1-py39h4ab4a9b_0.conda#f947c9a1c65da729963b3035c219ba10 -https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.2-py39hb2f4e1b_0.conda#1bc33de45069ad534182ca92e616ec7e -https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.22.3-py39h2e5f0a9_0.conda#16892a18dae1fb1522845e4b6005b436 -https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.4-py39he696674_0.conda#9776eb34625bf969ba017f7362ecf23f -https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.9.3-py39h3d31255_0.conda#c2917042394d646f4a2ca22e0b665a06 -https://repo.anaconda.com/pkgs/main/osx-64/pandas-1.5.1-py39h07fba90_0.conda#d1137f8d61981eed108f5fe0452d0848 -https://repo.anaconda.com/pkgs/main/osx-64/pyamg-4.2.3-py39hc29d2bd_0.conda#728a52ac4cc423a4895158c08b95bedf +https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.3.3-py312h6c40b1e_0.conda#49173b5a36c9134865221f29d4a73fb6 +https://repo.anaconda.com/pkgs/main/osx-64/unicodedata2-15.1.0-py312h6c40b1e_0.conda#65bd2cb787fc99662d9bb6e6520c5826 +https://repo.anaconda.com/pkgs/main/osx-64/wheel-0.43.0-py312hecd8cb5_0.conda#c0bdd5748b170523232e8ad1d667136c +https://repo.anaconda.com/pkgs/main/osx-64/fonttools-4.51.0-py312h6c40b1e_0.conda#8f55fa86b73e8a7f4403503f9b7a9959 +https://repo.anaconda.com/pkgs/main/osx-64/meson-1.3.1-py312hecd8cb5_0.conda#43963a2b38becce4caa95434b8c96837 +https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.26.4-py312h6f81483_0.conda#87f73efbf26ab2e2ea7c32481a71bd47 +https://repo.anaconda.com/pkgs/main/osx-64/pillow-10.3.0-py312h6c40b1e_0.conda#fe883fa4247d35fe6de49f713529ca02 +https://repo.anaconda.com/pkgs/main/osx-64/pip-24.0-py312hecd8cb5_0.conda#7a8e0b1d3742ddf1c8aa97fbaa158039 +https://repo.anaconda.com/pkgs/main/osx-64/pyproject-metadata-0.7.1-py312hecd8cb5_0.conda#e91ce37477d24dcdf7e0a8b93c5e72fd +https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.4.0-py312hecd8cb5_0.conda#b816a2439ba9b87524aec74d58e55b0a +https://repo.anaconda.com/pkgs/main/osx-64/python-dateutil-2.9.0post0-py312hecd8cb5_0.conda#b3ed54eb118325785284dd18bfceca19 +https://repo.anaconda.com/pkgs/main/osx-64/meson-python-0.15.0-py312h6c40b1e_0.conda#688ab56b9d8e5a2e3f018ca3ce34e061 +https://repo.anaconda.com/pkgs/main/osx-64/pytest-cov-4.1.0-py312hecd8cb5_1.conda#a33a24eb20359f464938e75b2f57e23a +https://repo.anaconda.com/pkgs/main/osx-64/pytest-xdist-3.5.0-py312hecd8cb5_0.conda#d1ecfb3691cceecb1f16bcfdf0b67bb5 +https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.3.7-py312h32608ca_0.conda#f96a01eba5ea542cf9c7cc8d77447627 +https://repo.anaconda.com/pkgs/main/osx-64/contourpy-1.2.0-py312ha357a0b_0.conda#57d384ad07152375b40a6293f79e3f0c +https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.8.4-py312hecd8cb5_0.conda#6886c230c2ec2f47621b5cca4c7d493a +https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.8.4-py312h7f12edd_0.conda#a4eee14a4dcaa89b306ca33d2d479fa4 +https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.8-py312h6c40b1e_0.conda#d59d01b940493f2b6a84aac922fd0c76 +https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.4-py312ha357a0b_0.conda#c1ea9c8eee79a5af3399f3c31be0e9c6 +https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.26.4-py312hac873b0_0.conda#3150bac1e382156f82a153229e1ebd06 +https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.7-py312hac873b0_0.conda#6303ba071636ef57fddf69eb6f440ec1 +https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.11.4-py312h81688c2_0.conda#7d57b4c21a9261f97fa511e0940c5d93 +https://repo.anaconda.com/pkgs/main/osx-64/pandas-2.2.1-py312he282a81_0.conda#021b70a1e40efb75b89eb8ebdb347132 +https://repo.anaconda.com/pkgs/main/osx-64/pyamg-4.2.3-py312h44cbcf4_0.conda#3bdc7be74087b3a5a83c520a74e1e8eb +# pip cython @ https://files.pythonhosted.org/packages/d5/6d/06c08d75adb98cdf72af18801e193d22580cc86ca553610f430f18ea26b3/Cython-3.0.10-cp312-cp312-macosx_10_9_x86_64.whl#sha256=8f2864ab5fcd27a346f0b50f901ebeb8f60b25a60a575ccfd982e7f3e9674914 +# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml b/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml index 8127d5af88b18..0f82886f4acb2 100644 --- a/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml +++ b/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml @@ -16,10 +16,11 @@ dependencies: - matplotlib - pandas - pyamg - - pytest - - pytest-xdist==2.5.0 + - pytest<8 + - pytest-xdist - pillow - - codecov + - ninja + - meson-python - pytest-cov - coverage - sphinx diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock index 68a5541f9f88c..46fd0d308eaa2 100644 --- a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock +++ b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock @@ -1,88 +1,88 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: f66cd382e1555318ed0b7498301d0e9dbe2b1d509ca7c7e13c7db959069cec83 +# input_hash: d4063b0b99f7a39e30c5f6e2d9c5dd293d9b206ce326841bf811534ea1be79f0 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2022.10.11-h06a4308_0.conda#e9b86b388e2cf59585fefca34037b783 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2022f-h04d1e81_0.conda#02f786cfa9e5c45d8439799445287030 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_2.conda#88a54b8f50e351c650e16f4ee781440c -https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.3-h5eee18b_3.conda#0c616f387885c1bbb57ec0bd1e779ced -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1s-h7f8727e_0.conda#25f9c4e2394976be98d01cccef2ce43a -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.6-h5eee18b_0.conda#8abc704d4a473839d5351b43deb793bb -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_1.conda#d1d1fc47640fe0d9f7fa64c0a054bfd8 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.40.0-h5082296_0.conda#d1300b056e728ea61a0bf135b035e60d -https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.15-haa1d7c7_0.conda#dacae2189e4ec6083804b07b44f1a342 -https://repo.anaconda.com/pkgs/main/linux-64/certifi-2022.9.24-py39h06a4308_0.conda#1e3ca01764ce78e609ab61b8067734eb -https://repo.anaconda.com/pkgs/main/noarch/wheel-0.37.1-pyhd3eb1b0_0.conda#ab85e96e26da8d5797c2458232338b86 -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-65.5.0-py39h06a4308_0.conda#3af37a56c2d135aff97e1e76120e3539 -https://repo.anaconda.com/pkgs/main/linux-64/pip-22.2.2-py39h06a4308_0.conda#cb97bf53e76d609bf93b2e9dd04799d8 -# pip alabaster @ https://files.pythonhosted.org/packages/10/ad/00b090d23a222943eb0eda509720a404f531a439e803f6538f35136cae9e/alabaster-0.7.12-py2.py3-none-any.whl#sha256=446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359 -# pip attrs @ https://files.pythonhosted.org/packages/f2/bc/d817287d1aa01878af07c19505fafd1165cd6a119e9d0821ca1d1c20312d/attrs-22.1.0-py2.py3-none-any.whl#sha256=86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c -# pip charset-normalizer @ https://files.pythonhosted.org/packages/db/51/a507c856293ab05cdc1db77ff4bc1268ddd39f29e7dc4919aa497f0adbec/charset_normalizer-2.1.1-py3-none-any.whl#sha256=83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f -# pip cycler @ https://files.pythonhosted.org/packages/5c/f9/695d6bedebd747e5eb0fe8fad57b72fdf25411273a39791cde838d5a8f51/cycler-0.11.0-py3-none-any.whl#sha256=3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3 -# pip cython @ https://files.pythonhosted.org/packages/c3/8f/bb0a7182dc081fbc6608e98a8184970e7d903acfc1ec58680d46f5c915ce/Cython-0.29.32-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl#sha256=f3fd44cc362eee8ae569025f070d56208908916794b6ab21e139cea56470a2b3 -# pip docutils @ https://files.pythonhosted.org/packages/93/69/e391bd51bc08ed9141ecd899a0ddb61ab6465309f1eb470905c0c8868081/docutils-0.19-py3-none-any.whl#sha256=5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc -# pip exceptiongroup @ https://files.pythonhosted.org/packages/ce/2e/9a327cc0d2d674ee2d570ee30119755af772094edba86d721dda94404d1a/exceptiongroup-1.0.4-py3-none-any.whl#sha256=542adf9dea4055530d6e1279602fa5cb11dab2395fa650b8674eaec35fc4a828 -# pip execnet @ https://files.pythonhosted.org/packages/81/c0/3072ecc23f4c5e0a1af35e3a222855cfd9c80a1a105ca67be3b6172637dd/execnet-1.9.0-py2.py3-none-any.whl#sha256=a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142 -# pip fonttools @ https://files.pythonhosted.org/packages/e3/d9/e9bae85e84737e76ebbcbea13607236da0c0699baed0ae4f1151b728a608/fonttools-4.38.0-py3-none-any.whl#sha256=820466f43c8be8c3009aef8b87e785014133508f0de64ec469e4efb643ae54fb -# pip idna @ https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl#sha256=90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.19-h955ad1f_1.conda#4b453281859c293c9d577271f3b18a0d +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-69.5.1-py39h06a4308_0.conda#3eb144d481b39c0fbbced789dd9b76b3 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py39h06a4308_0.conda#40bb60408c7433d767fd8c65b35bc4a0 +https://repo.anaconda.com/pkgs/main/linux-64/pip-24.0-py39h06a4308_0.conda#7f8ce3af15cfecd12e4dda8c5cef5fb7 +# pip alabaster @ https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl#sha256=b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92 +# pip babel @ https://files.pythonhosted.org/packages/27/45/377f7e32a5c93d94cd56542349b34efab5ca3f9e2fd5a68c5e93169aa32d/Babel-2.15.0-py3-none-any.whl#sha256=08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb +# pip certifi @ https://files.pythonhosted.org/packages/ba/06/a07f096c664aeb9f01624f858c3add0a4e913d6c96257acb4fce61e7de14/certifi-2024.2.2-py3-none-any.whl#sha256=dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1 +# pip charset-normalizer @ https://files.pythonhosted.org/packages/98/69/5d8751b4b670d623aa7a47bef061d69c279e9f922f6705147983aa76c3ce/charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796 +# pip cycler @ https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl#sha256=85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30 +# pip cython @ https://files.pythonhosted.org/packages/a7/f5/3dde4d96076888ceaa981827b098274c2b45ddd4b20d75a8cfaa92b91eec/Cython-3.0.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=651a15a8534ebfb9b58cb0b87c269c70984b6f9c88bfe65e4f635f0e3f07dfcd +# pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 +# pip exceptiongroup @ https://files.pythonhosted.org/packages/01/90/79fe92dd413a9cab314ef5c591b5aa9b9ba787ae4cadab75055b0ae00b33/exceptiongroup-1.2.1-py3-none-any.whl#sha256=5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad +# pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc +# pip fonttools @ https://files.pythonhosted.org/packages/8b/c6/636f008104908a93b80419f756be755bb91df4b8a0c88d5158bb52c82c3a/fonttools-4.51.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=0d145976194a5242fdd22df18a1b451481a88071feadf251221af110ca8f00ce +# pip idna @ https://files.pythonhosted.org/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl#sha256=82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b -# pip iniconfig @ https://files.pythonhosted.org/packages/9b/dd/b3c12c6d707058fa947864b67f0c4e0c39ef8610988d7baea9578f3c48f3/iniconfig-1.1.1-py2.py3-none-any.whl#sha256=011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 -# pip joblib @ https://files.pythonhosted.org/packages/91/d4/3b4c8e5a30604df4c7518c562d4bf0502f2fa29221459226e140cf846512/joblib-1.2.0-py3-none-any.whl#sha256=091138ed78f800342968c523bdde947e7a305b8594b910a0fea2ab83c3c6d385 -# pip kiwisolver @ https://files.pythonhosted.org/packages/a4/36/c414d75be311ce97ef7248edcc4fc05afae2998641bf6b592d43a9dee581/kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f -# pip markupsafe @ https://files.pythonhosted.org/packages/df/06/c515c5bc43b90462e753bc768e6798193c6520c9c7eb2054c7466779a9db/MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77 -# pip networkx @ https://files.pythonhosted.org/packages/42/31/d2f89f1ae42718f8c8a9e440ebe38d7d5fe1e0d9eb9178ce779e365b3ab0/networkx-2.8.8-py3-none-any.whl#sha256=e435dfa75b1d7195c7b8378c3859f0445cd88c6b0375c181ed66823a9ceb7524 -# pip numpy @ https://files.pythonhosted.org/packages/4c/b9/038abd6fbd67b05b03cb1af590cfc02b7f1e5a37af7ac6a868f5093c29f5/numpy-1.23.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=33161613d2269025873025b33e879825ec7b1d831317e68f4f2f0f84ed14c719 -# pip pillow @ https://files.pythonhosted.org/packages/2f/73/ec6b3e3f6b311cf1468eafc92a890f690a2cacac0cfd0f1bcc2b891d1334/Pillow-9.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=af0372acb5d3598f36ec0914deed2a63f6bcdb7b606da04dc19a88d31bf0c05b -# pip pluggy @ https://files.pythonhosted.org/packages/9e/01/f38e2ff29715251cf25532b9082a1589ab7e4f571ced434f98d0139336dc/pluggy-1.0.0-py2.py3-none-any.whl#sha256=74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 -# pip py @ https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl#sha256=607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 -# pip pygments @ https://files.pythonhosted.org/packages/4f/82/672cd382e5b39ab1cd422a672382f08a1fb3d08d9e0c0f3707f33a52063b/Pygments-2.13.0-py3-none-any.whl#sha256=f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42 -# pip pyparsing @ https://files.pythonhosted.org/packages/6c/10/a7d0fa5baea8fe7b50f448ab742f26f52b80bfca85ac2be9d35cdd9a3246/pyparsing-3.0.9-py3-none-any.whl#sha256=5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc -# pip pytz @ https://files.pythonhosted.org/packages/85/ac/92f998fc52a70afd7f6b788142632afb27cd60c8c782d1452b7466603332/pytz-2022.6-py2.py3-none-any.whl#sha256=222439474e9c98fced559f1709d89e6c9cbf8d79c794ff3eb9f8800064291427 +# pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 +# pip joblib @ https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl#sha256=06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 +# pip kiwisolver @ https://files.pythonhosted.org/packages/c0/a8/841594f11d0b88d8aeb26991bc4dac38baa909dc58d0c4262a4f7893bcbf/kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff +# pip markupsafe @ https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 +# pip meson @ https://files.pythonhosted.org/packages/33/75/b1a37fa7b2dbca8c0dbb04d5cdd7e2720c8ef6febe41b4a74866350e041c/meson-1.4.0-py3-none-any.whl#sha256=476a458d51fcfa322a6bdc64da5138997c542d08e6b2e49b9fa68c46fd7c4475 +# pip networkx @ https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl#sha256=f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2 +# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b +# pip numpy @ https://files.pythonhosted.org/packages/54/30/c2a907b9443cf42b90c17ad10c1e8fa801975f01cb9764f3f8eb8aea638b/numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 +# pip packaging @ https://files.pythonhosted.org/packages/49/df/1fceb2f8900f8639e278b056416d49134fb8d84c5942ffaa01ad34782422/packaging-24.0-py3-none-any.whl#sha256=2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 +# pip pillow @ https://files.pythonhosted.org/packages/f5/6d/52e82352670e850f468de9e6bccced4202a09f58e7ea5ecdbf08283d85cb/pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl#sha256=1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8 +# pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 +# pip pygments @ https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl#sha256=b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a +# pip pyparsing @ https://files.pythonhosted.org/packages/9d/ea/6d76df31432a0e6fdf81681a895f009a4bb47b3c39036db3e1b528191d52/pyparsing-3.1.2-py3-none-any.whl#sha256=f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742 +# pip pytz @ https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl#sha256=328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319 # pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a -# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/dc/47/86022665a9433d89a66f5911b558ddff69861766807ba685de2e324bd6ed/sphinxcontrib_applehelp-1.0.2-py2.py3-none-any.whl#sha256=806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a -# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl#sha256=8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e -# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/63/40/c854ef09500e25f6432dcbad0f37df87fd7046d376272292d8654cc71c95/sphinxcontrib_htmlhelp-2.0.0-py2.py3-none-any.whl#sha256=d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07 +# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/56/89/fea3fbf6785b388e6cb8a1beaf62f96e80b37311bdeed6e133388a732426/sphinxcontrib_applehelp-1.0.8-py3-none-any.whl#sha256=cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4 +# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/a0/52/1049d918d1d1c72857d285c3f0c64c1cbe0be394ce1c93a3d2aa4f39fe3b/sphinxcontrib_devhelp-1.0.6-py3-none-any.whl#sha256=6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f +# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/c2/e9/74c4cda5b409af3222fda38f0774e616011bc935f639dbc0da5ca2d1be7d/sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl#sha256=393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 -# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl#sha256=bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6 -# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl#sha256=352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd -# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b +# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/80/b3/1beac14a88654d2e5120d0143b49be5ad450b86eb1963523d8dbdcc51eb2/sphinxcontrib_qthelp-1.0.7-py3-none-any.whl#sha256=e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 +# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/38/24/228bb903ea87b9e08ab33470e6102402a644127108c7117ac9c00d849f82/sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl#sha256=326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 +# pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f +# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 # pip tomli @ https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl#sha256=939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc -# pip typing-extensions @ https://files.pythonhosted.org/packages/0b/8e/f1a0a5a76cfef77e1eb6004cb49e5f8d72634da638420b9ea492ce8305e8/typing_extensions-4.4.0-py3-none-any.whl#sha256=16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e -# pip urllib3 @ https://files.pythonhosted.org/packages/6f/de/5be2e3eed8426f871b170663333a0f627fc2924cc386cd41be065e7ea870/urllib3-1.26.12-py2.py3-none-any.whl#sha256=b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997 -# pip zipp @ https://files.pythonhosted.org/packages/40/8a/d63273ed0fa4a3d06f77e7b043f6577d8894e95515b0c187c52e2c0efabb/zipp-3.10.0-py3-none-any.whl#sha256=4fcb6f278987a6605757302a6e40e896257570d11c51628968ccb2a47e80c6c1 -# pip babel @ https://files.pythonhosted.org/packages/92/f7/86301a69926e11cd52f73396d169554d09b20b1723a040c2dcc1559ef588/Babel-2.11.0-py3-none-any.whl#sha256=1ad3eca1c885218f6dce2ab67291178944f810a10a9b5f3cb8382a5a232b64fe -# pip contourpy @ https://files.pythonhosted.org/packages/2f/b2/3787a2993307d8305d693594b2e0f3a0fc95b4e064ad4582324487fc848a/contourpy-1.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=1dedf4c64185a216c35eb488e6f433297c660321275734401760dafaeb0ad5c2 -# pip coverage @ https://files.pythonhosted.org/packages/6b/f2/919f0fdc93d3991ca074894402074d847be8ac1e1d78e7e9e1c371b69a6f/coverage-6.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8f830ed581b45b82451a40faabb89c84e1a998124ee4212d440e9c6cf70083e5 -# pip imageio @ https://files.pythonhosted.org/packages/33/4d/d31ab40bb761fb381c7514e6070c6e1643c44f83a2a48a83e4066227737f/imageio-2.22.4-py3-none-any.whl#sha256=bb173f8af27e4921f59539c4d45068fcedb892e58261fce8253f31c9a0ff9ccf -# pip importlib-metadata @ https://files.pythonhosted.org/packages/b5/64/ef29a63cf08f047bb7fb22ab0f1f774b87eed0bb46d067a5a524798a4af8/importlib_metadata-5.0.0-py3-none-any.whl#sha256=ddb0e35065e8938f867ed4928d0ae5bf2a53b7773871bfe6bcc7e4fcdc7dea43 -# pip jinja2 @ https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 -# pip packaging @ https://files.pythonhosted.org/packages/05/8e/8de486cbd03baba4deef4142bd643a3e7bbe954a784dc1bb17142572d127/packaging-21.3-py3-none-any.whl#sha256=ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 -# pip python-dateutil @ https://files.pythonhosted.org/packages/36/7a/87837f39d0296e723bb9b62bbb257d0355c7f6128853c78955f57342a56d/python_dateutil-2.8.2-py2.py3-none-any.whl#sha256=961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 -# pip pywavelets @ https://files.pythonhosted.org/packages/5a/98/4549479a32972bdfdd5e75e168219e97f4dfaee535a8308efef7291e8398/PyWavelets-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=71ab30f51ee4470741bb55fc6b197b4a2b612232e30f6ac069106f0156342356 -# pip requests @ https://files.pythonhosted.org/packages/ca/91/6d9b8ccacd0412c08820f72cebaa4f0c0441b5cda699c90f618b6f8a1b42/requests-2.28.1-py3-none-any.whl#sha256=8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349 -# pip scipy @ https://files.pythonhosted.org/packages/bb/b7/380c9e4cd71263f03d16f8a92c0e44c9bdef38777e1a7dde1f47ba996bac/scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0 -# pip tifffile @ https://files.pythonhosted.org/packages/d2/cb/1ecf9f39113a7ad0529a0441a16982791e7b37a4efdba2f89a687fdf15c9/tifffile-2022.10.10-py3-none-any.whl#sha256=87f3aee8a0d06b74655269a105de75c1958a24653e1930d523eb516100043503 -# pip codecov @ https://files.pythonhosted.org/packages/dc/e2/964d0881eff5a67bf5ddaea79a13c7b34a74bc4efe917b368830b475a0b9/codecov-2.1.12-py2.py3-none-any.whl#sha256=585dc217dc3d8185198ceb402f85d5cb5dbfa0c5f350a5abcdf9e347776a5b47 -# pip pandas @ https://files.pythonhosted.org/packages/5e/ed/5c9cdaa5d48c7194bef4335eab3cdc2f8afa868a5546027e018ea9deb4c3/pandas-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=344021ed3e639e017b452aa8f5f6bf38a8806f5852e217a7594417fb9bbfa00e -# pip pyamg @ https://files.pythonhosted.org/packages/8e/08/d512b6e34d502152723b5a4ad9d962a6141dfe83cd8bcd01af4cb6e84f28/pyamg-4.2.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=18af99d2551df07951c35cf270dc76703f8c5d30b16ea8e61657fda098f57dd7 -# pip pytest @ https://files.pythonhosted.org/packages/67/68/a5eb36c3a8540594b6035e6cdae40c1ef1b6a2bfacbecc3d1a544583c078/pytest-7.2.0-py3-none-any.whl#sha256=892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71 -# pip scikit-image @ https://files.pythonhosted.org/packages/0f/29/d157cd648b87212e498189c183a32f0f48e24fe22e9673dacd97594f39fa/scikit_image-0.19.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=ff3b1025356508d41f4fe48528e509d95f9e4015e90cf158cd58c56dc63e0ac5 -# pip scikit-learn @ https://files.pythonhosted.org/packages/fa/74/78f4c6ae97ccd9cd9bac5ac8999af7c1f21a438edca5c5b381394568831e/scikit_learn-1.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f5d4231af7199531e77da1b78a4cc6b3d960a00b1ec672578ac818aae2b9c35d -# pip setuptools-scm @ https://files.pythonhosted.org/packages/01/ed/75a20e7b075e8ecb1f84e8debf833917905d8790b78008915bd68dddd5c4/setuptools_scm-7.0.5-py3-none-any.whl#sha256=7930f720905e03ccd1e1d821db521bff7ec2ac9cf0ceb6552dd73d24a45d3b02 -# pip sphinx @ https://files.pythonhosted.org/packages/67/a7/01dd6fd9653c056258d65032aa09a615b5d7b07dd840845a9f41a8860fbc/sphinx-5.3.0-py3-none-any.whl#sha256=060ca5c9f7ba57a08a1219e547b269fadf125ae25b06b9fa7f66768efb652d6d -# pip lightgbm @ https://files.pythonhosted.org/packages/19/b7/a880bb0922df5413909d1d6d7831b1e93622f113c7889f58a775a9c79ce4/lightgbm-3.3.3-py3-none-manylinux1_x86_64.whl#sha256=389edda68b7f24a1755a6af4dad06e16236e374e9de64253a105b12982b153e2 -# pip matplotlib @ https://files.pythonhosted.org/packages/d8/c0/96da5f5532ac500860a52f87a933cdea66436f1c436a76e80015ee2409c4/matplotlib-3.6.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=795ad83940732b45d39b82571f87af0081c120feff2b12e748d96bb191169e33 -# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9 -# pip pytest-cov @ https://files.pythonhosted.org/packages/fe/1f/9ec0ddd33bd2b37d6ec50bb39155bca4fe7085fa78b3b434c05459a860e3/pytest_cov-4.0.0-py3-none-any.whl#sha256=2feb1b751d66a8bd934e5edfa2e961d11309dc37b73b0eabe73b5945fee20f6b -# pip pytest-forked @ https://files.pythonhosted.org/packages/0c/36/c56ef2aea73912190cdbcc39aaa860db8c07c1a5ce8566994ec9425453db/pytest_forked-1.4.0-py3-none-any.whl#sha256=bbbb6717efc886b9d64537b41fb1497cfaf3c9601276be8da2cccfea5a3c8ad8 -# pip pytest-xdist @ https://files.pythonhosted.org/packages/21/08/b1945d4b4986eb1aa10cf84efc5293bba39da80a2f95db3573dd90678408/pytest_xdist-2.5.0-py3-none-any.whl#sha256=6fe5c74fec98906deb8f2d2b616b5c782022744978e7bd4695d39c8f42d0ce65 +# pip tzdata @ https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl#sha256=9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252 +# pip urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.whl#sha256=450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d +# pip zipp @ https://files.pythonhosted.org/packages/c2/0a/ba9d0ee9536d3ef73a3448e931776e658b36f128d344e175bc32b092a8bf/zipp-3.18.1-py3-none-any.whl#sha256=206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b +# pip contourpy @ https://files.pythonhosted.org/packages/31/a2/2f12e3a6e45935ff694654b710961b03310b0e1ec997ee9f416d3c873f87/contourpy-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e1d59258c3c67c865435d8fbeb35f8c59b8bef3d6f46c1f29f6123556af28445 +# pip coverage @ https://files.pythonhosted.org/packages/c1/50/b7d6f236c20334b0378ed88078e830640a64ad8eb9f11f818b2af34d00c0/coverage-7.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=d21918e9ef11edf36764b93101e2ae8cc82aa5efdc7c5a4e9c6c35a48496d601 +# pip imageio @ https://files.pythonhosted.org/packages/a3/b6/39c7dad203d9984225f47e0aa39ac3ba3a47c77a02d0ef2a7be691855a06/imageio-2.34.1-py3-none-any.whl#sha256=408c1d4d62f72c9e8347e7d1ca9bc11d8673328af3913868db3b828e28b40a4c +# pip importlib-metadata @ https://files.pythonhosted.org/packages/2d/0a/679461c511447ffaf176567d5c496d1de27cbe34a87df6677d7171b2fbd4/importlib_metadata-7.1.0-py3-none-any.whl#sha256=30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570 +# pip importlib-resources @ https://files.pythonhosted.org/packages/75/06/4df55e1b7b112d183f65db9503bff189e97179b256e1ea450a3c365241e0/importlib_resources-6.4.0-py3-none-any.whl#sha256=50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c +# pip jinja2 @ https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl#sha256=bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +# pip lazy-loader @ https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl#sha256=342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526 +# pip pytest @ https://files.pythonhosted.org/packages/51/ff/f6e8b8f39e08547faece4bd80f89d5a8de68a38b2d179cc1c4490ffa3286/pytest-7.4.4-py3-none-any.whl#sha256=b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8 +# pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 +# pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f +# pip scipy @ https://files.pythonhosted.org/packages/c6/ba/a778e6c0020d728c119b0379805a357135fe8c9bc87fdb7e0750ca11319f/scipy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=28e286bf9ac422d6beb559bc61312c348ca9b0f0dae0d7c5afde7f722d6ea13d +# pip tifffile @ https://files.pythonhosted.org/packages/c1/79/29d0fa40017f7b749ce344759dcc21e2ec9bbb81fc69ca2ce06e261f83f0/tifffile-2024.5.10-py3-none-any.whl#sha256=4154f091aa24d4e75bfad9ab2d5424a68c70e67b8220188066dc61946d4551bd +# pip lightgbm @ https://files.pythonhosted.org/packages/ba/11/cb8b67f3cbdca05b59a032bb57963d4fe8c8d18c3870f30bed005b7f174d/lightgbm-4.3.0-py3-none-manylinux_2_28_x86_64.whl#sha256=104496a3404cb2452d3412cbddcfbfadbef9c372ea91e3a9b8794bcc5183bf07 +# pip matplotlib @ https://files.pythonhosted.org/packages/5e/2c/513395a63a9e1124a5648addbf73be23cc603f955af026b04416da98dc96/matplotlib-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=606e3b90897554c989b1e38a258c626d46c873523de432b1462f295db13de6f9 +# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8 +# pip pandas @ https://files.pythonhosted.org/packages/bb/30/f6f1f1ac36250f50c421b1b6af08c35e5a8b5a84385ef928625336b93e6f/pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921 +# pip pyamg @ https://files.pythonhosted.org/packages/68/a9/aed9f557e7eb779d2cb4fa090663f8540979e0c04dadd16e9a0bdc9632c5/pyamg-5.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5817d4567fb240dab4779bb1630bbb3035b3827731fcdaeb9ecc9c8814319995 +# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 +# pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7 +# pip scikit-image @ https://files.pythonhosted.org/packages/a3/7e/4cd853a855ac34b4ef3ef6a5c3d1c2e96eaca1154fc6be75db55ffa87393/scikit_image-0.22.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=3b7a6c89e8d6252332121b58f50e1625c35f7d6a85489c0b6b7ee4f5155d547a +# pip sphinx @ https://files.pythonhosted.org/packages/b4/fa/130c32ed94cf270e3d0b9ded16fb7b2c8fea86fa7263c29a696a30c1dde7/sphinx-7.3.7-py3-none-any.whl#sha256=413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3 +# pip numpydoc @ https://files.pythonhosted.org/packages/f0/fa/dcfe0f65660661db757ee9ebd84e170ff98edd5d80235f62457d9088f85f/numpydoc-1.7.0-py3-none-any.whl#sha256=5a56419d931310d79a06cfc2a126d1558700feeb9b4f3d8dcae1a8134be829c9 diff --git a/build_tools/azure/pylatest_pip_scipy_dev_environment.yml b/build_tools/azure/pylatest_pip_scipy_dev_environment.yml index 31eb7117d21a2..7d8e7a66d987e 100644 --- a/build_tools/azure/pylatest_pip_scipy_dev_environment.yml +++ b/build_tools/azure/pylatest_pip_scipy_dev_environment.yml @@ -9,9 +9,11 @@ dependencies: - pip - pip: - threadpoolctl - - pytest - - pytest-xdist==2.5.0 - - codecov + - pytest<8 + - pytest-xdist + - pip + - ninja + - meson-python - pytest-cov - coverage - pooch diff --git a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock index 462b0360dc4b6..e4305c97b76bc 100644 --- a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock +++ b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock @@ -1,69 +1,67 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: f0170b6948e8a0368478b41b017d43e0009cabf81b15556aa9433c9359c3f52c +# input_hash: 777413179f12c3f7972520657eb2c826ffd6ff4c15e5da73631696b7ef07c3f2 @EXPLICIT https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 -https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2022.10.11-h06a4308_0.conda#e9b86b388e2cf59585fefca34037b783 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2022f-h04d1e81_0.conda#02f786cfa9e5c45d8439799445287030 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 -https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h7b6447c_0.conda#9303f4af7c004e069bae22bde8d800ee -https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_2.conda#88a54b8f50e351c650e16f4ee781440c +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297 +https://repo.anaconda.com/pkgs/main/linux-64/expat-2.6.2-h6a678d5_0.conda#55049db2772dae035f6b8a95f72b5970 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 -https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.3-h5eee18b_3.conda#0c616f387885c1bbb57ec0bd1e779ced -https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1s-h7f8727e_0.conda#25f9c4e2394976be98d01cccef2ce43a -https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.6-h5eee18b_0.conda#8abc704d4a473839d5351b43deb793bb -https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_1.conda#d1d1fc47640fe0d9f7fa64c0a054bfd8 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb -https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 -https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.40.0-h5082296_0.conda#d1300b056e728ea61a0bf135b035e60d -https://repo.anaconda.com/pkgs/main/linux-64/python-3.10.8-haa1d7c7_0.conda#f94e0ff0addc80d8746e04c6d9367012 -https://repo.anaconda.com/pkgs/main/linux-64/certifi-2022.9.24-py310h06a4308_0.conda#20f896f4142bbcf3f4e932082c40ee43 -https://repo.anaconda.com/pkgs/main/noarch/wheel-0.37.1-pyhd3eb1b0_0.conda#ab85e96e26da8d5797c2458232338b86 -https://repo.anaconda.com/pkgs/main/linux-64/setuptools-65.5.0-py310h06a4308_0.conda#776ce9588114e5a9e2b7298bd538c231 -https://repo.anaconda.com/pkgs/main/linux-64/pip-22.2.2-py310h06a4308_0.conda#b446157ab55432767f85b69b135dc452 -# pip alabaster @ https://files.pythonhosted.org/packages/10/ad/00b090d23a222943eb0eda509720a404f531a439e803f6538f35136cae9e/alabaster-0.7.12-py2.py3-none-any.whl#sha256=446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359 -# pip appdirs @ https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl#sha256=a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128 -# pip attrs @ https://files.pythonhosted.org/packages/f2/bc/d817287d1aa01878af07c19505fafd1165cd6a119e9d0821ca1d1c20312d/attrs-22.1.0-py2.py3-none-any.whl#sha256=86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c -# pip charset-normalizer @ https://files.pythonhosted.org/packages/db/51/a507c856293ab05cdc1db77ff4bc1268ddd39f29e7dc4919aa497f0adbec/charset_normalizer-2.1.1-py3-none-any.whl#sha256=83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f -# pip docutils @ https://files.pythonhosted.org/packages/93/69/e391bd51bc08ed9141ecd899a0ddb61ab6465309f1eb470905c0c8868081/docutils-0.19-py3-none-any.whl#sha256=5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc -# pip exceptiongroup @ https://files.pythonhosted.org/packages/ce/2e/9a327cc0d2d674ee2d570ee30119755af772094edba86d721dda94404d1a/exceptiongroup-1.0.4-py3-none-any.whl#sha256=542adf9dea4055530d6e1279602fa5cb11dab2395fa650b8674eaec35fc4a828 -# pip execnet @ https://files.pythonhosted.org/packages/81/c0/3072ecc23f4c5e0a1af35e3a222855cfd9c80a1a105ca67be3b6172637dd/execnet-1.9.0-py2.py3-none-any.whl#sha256=a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142 -# pip idna @ https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl#sha256=90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2 +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/python-3.12.3-h996f2a0_1.conda#0e22ed7e6df024e4f7467e75c8575301 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-69.5.1-py312h06a4308_0.conda#ce85d9a864a73e0b12d31a97733c9fca +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py312h06a4308_0.conda#18d5f3b68a175c72576876db4afc9e9e +https://repo.anaconda.com/pkgs/main/linux-64/pip-24.0-py312h06a4308_0.conda#6d9697bb8b9f3212be10b3b8e01a12b9 +# pip alabaster @ https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl#sha256=b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92 +# pip babel @ https://files.pythonhosted.org/packages/27/45/377f7e32a5c93d94cd56542349b34efab5ca3f9e2fd5a68c5e93169aa32d/Babel-2.15.0-py3-none-any.whl#sha256=08706bdad8d0a3413266ab61bd6c34d0c28d6e1e7badf40a2cebe67644e2e1fb +# pip certifi @ https://files.pythonhosted.org/packages/ba/06/a07f096c664aeb9f01624f858c3add0a4e913d6c96257acb4fce61e7de14/certifi-2024.2.2-py3-none-any.whl#sha256=dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1 +# pip charset-normalizer @ https://files.pythonhosted.org/packages/ee/fb/14d30eb4956408ee3ae09ad34299131fb383c47df355ddb428a7331cfa1e/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b +# pip coverage @ https://files.pythonhosted.org/packages/3f/4f/fcad903698f02ac0d7501432449db12e15fbe5ecfbc01e363eb752c65cbd/coverage-7.5.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8748731ad392d736cc9ccac03c9845b13bb07d020a33423fa5b3a36521ac6e4e +# pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 +# pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc +# pip idna @ https://files.pythonhosted.org/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl#sha256=82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b -# pip iniconfig @ https://files.pythonhosted.org/packages/9b/dd/b3c12c6d707058fa947864b67f0c4e0c39ef8610988d7baea9578f3c48f3/iniconfig-1.1.1-py2.py3-none-any.whl#sha256=011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 -# pip markupsafe @ https://files.pythonhosted.org/packages/9e/82/2e089c6f34e77c073aa5a67040d368aac0dfb9b8ccbb46d381452c26fc33/MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5 -# pip pluggy @ https://files.pythonhosted.org/packages/9e/01/f38e2ff29715251cf25532b9082a1589ab7e4f571ced434f98d0139336dc/pluggy-1.0.0-py2.py3-none-any.whl#sha256=74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 -# pip py @ https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl#sha256=607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 -# pip pygments @ https://files.pythonhosted.org/packages/4f/82/672cd382e5b39ab1cd422a672382f08a1fb3d08d9e0c0f3707f33a52063b/Pygments-2.13.0-py3-none-any.whl#sha256=f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42 -# pip pyparsing @ https://files.pythonhosted.org/packages/6c/10/a7d0fa5baea8fe7b50f448ab742f26f52b80bfca85ac2be9d35cdd9a3246/pyparsing-3.0.9-py3-none-any.whl#sha256=5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc -# pip pytz @ https://files.pythonhosted.org/packages/85/ac/92f998fc52a70afd7f6b788142632afb27cd60c8c782d1452b7466603332/pytz-2022.6-py2.py3-none-any.whl#sha256=222439474e9c98fced559f1709d89e6c9cbf8d79c794ff3eb9f8800064291427 +# pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 +# pip markupsafe @ https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 +# pip meson @ https://files.pythonhosted.org/packages/33/75/b1a37fa7b2dbca8c0dbb04d5cdd7e2720c8ef6febe41b4a74866350e041c/meson-1.4.0-py3-none-any.whl#sha256=476a458d51fcfa322a6bdc64da5138997c542d08e6b2e49b9fa68c46fd7c4475 +# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b +# pip packaging @ https://files.pythonhosted.org/packages/49/df/1fceb2f8900f8639e278b056416d49134fb8d84c5942ffaa01ad34782422/packaging-24.0-py3-none-any.whl#sha256=2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 +# pip platformdirs @ https://files.pythonhosted.org/packages/68/13/2aa1f0e1364feb2c9ef45302f387ac0bd81484e9c9a4c5688a322fbdfd08/platformdirs-4.2.2-py3-none-any.whl#sha256=2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee +# pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 +# pip pygments @ https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl#sha256=b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a # pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a -# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/dc/47/86022665a9433d89a66f5911b558ddff69861766807ba685de2e324bd6ed/sphinxcontrib_applehelp-1.0.2-py2.py3-none-any.whl#sha256=806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a -# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl#sha256=8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e -# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/63/40/c854ef09500e25f6432dcbad0f37df87fd7046d376272292d8654cc71c95/sphinxcontrib_htmlhelp-2.0.0-py2.py3-none-any.whl#sha256=d412243dfb797ae3ec2b59eca0e52dac12e75a241bf0e4eb861e450d06c6ed07 +# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/56/89/fea3fbf6785b388e6cb8a1beaf62f96e80b37311bdeed6e133388a732426/sphinxcontrib_applehelp-1.0.8-py3-none-any.whl#sha256=cb61eb0ec1b61f349e5cc36b2028e9e7ca765be05e49641c97241274753067b4 +# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/a0/52/1049d918d1d1c72857d285c3f0c64c1cbe0be394ce1c93a3d2aa4f39fe3b/sphinxcontrib_devhelp-1.0.6-py3-none-any.whl#sha256=6485d09629944511c893fa11355bda18b742b83a2b181f9a009f7e500595c90f +# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/c2/e9/74c4cda5b409af3222fda38f0774e616011bc935f639dbc0da5ca2d1be7d/sphinxcontrib_htmlhelp-2.0.5-py3-none-any.whl#sha256=393f04f112b4d2f53d93448d4bce35842f62b307ccdc549ec1585e950bc35e04 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178 -# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl#sha256=bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6 -# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl#sha256=352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd -# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b -# pip tomli @ https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl#sha256=939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc -# pip urllib3 @ https://files.pythonhosted.org/packages/6f/de/5be2e3eed8426f871b170663333a0f627fc2924cc386cd41be065e7ea870/urllib3-1.26.12-py2.py3-none-any.whl#sha256=b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997 -# pip babel @ https://files.pythonhosted.org/packages/92/f7/86301a69926e11cd52f73396d169554d09b20b1723a040c2dcc1559ef588/Babel-2.11.0-py3-none-any.whl#sha256=1ad3eca1c885218f6dce2ab67291178944f810a10a9b5f3cb8382a5a232b64fe -# pip coverage @ https://files.pythonhosted.org/packages/3c/7d/d5211ea782b193ab8064b06dc0cc042cf1a4ca9c93a530071459172c550f/coverage-6.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=af4fffaffc4067232253715065e30c5a7ec6faac36f8fc8d6f64263b15f74db0 -# pip jinja2 @ https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61 -# pip packaging @ https://files.pythonhosted.org/packages/05/8e/8de486cbd03baba4deef4142bd643a3e7bbe954a784dc1bb17142572d127/packaging-21.3-py3-none-any.whl#sha256=ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 -# pip python-dateutil @ https://files.pythonhosted.org/packages/36/7a/87837f39d0296e723bb9b62bbb257d0355c7f6128853c78955f57342a56d/python_dateutil-2.8.2-py2.py3-none-any.whl#sha256=961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 -# pip requests @ https://files.pythonhosted.org/packages/ca/91/6d9b8ccacd0412c08820f72cebaa4f0c0441b5cda699c90f618b6f8a1b42/requests-2.28.1-py3-none-any.whl#sha256=8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349 -# pip codecov @ https://files.pythonhosted.org/packages/dc/e2/964d0881eff5a67bf5ddaea79a13c7b34a74bc4efe917b368830b475a0b9/codecov-2.1.12-py2.py3-none-any.whl#sha256=585dc217dc3d8185198ceb402f85d5cb5dbfa0c5f350a5abcdf9e347776a5b47 -# pip pooch @ https://files.pythonhosted.org/packages/8d/64/8e1bfeda3ba0f267b2d9a918e8ca51db8652d0e1a3412a5b3dbce85d90b6/pooch-1.6.0-py3-none-any.whl#sha256=3bf0e20027096836b8dbce0152dbb785a269abeb621618eb4bdd275ff1e23c9c -# pip pytest @ https://files.pythonhosted.org/packages/67/68/a5eb36c3a8540594b6035e6cdae40c1ef1b6a2bfacbecc3d1a544583c078/pytest-7.2.0-py3-none-any.whl#sha256=892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71 -# pip sphinx @ https://files.pythonhosted.org/packages/67/a7/01dd6fd9653c056258d65032aa09a615b5d7b07dd840845a9f41a8860fbc/sphinx-5.3.0-py3-none-any.whl#sha256=060ca5c9f7ba57a08a1219e547b269fadf125ae25b06b9fa7f66768efb652d6d -# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9 -# pip pytest-cov @ https://files.pythonhosted.org/packages/fe/1f/9ec0ddd33bd2b37d6ec50bb39155bca4fe7085fa78b3b434c05459a860e3/pytest_cov-4.0.0-py3-none-any.whl#sha256=2feb1b751d66a8bd934e5edfa2e961d11309dc37b73b0eabe73b5945fee20f6b -# pip pytest-forked @ https://files.pythonhosted.org/packages/0c/36/c56ef2aea73912190cdbcc39aaa860db8c07c1a5ce8566994ec9425453db/pytest_forked-1.4.0-py3-none-any.whl#sha256=bbbb6717efc886b9d64537b41fb1497cfaf3c9601276be8da2cccfea5a3c8ad8 -# pip pytest-xdist @ https://files.pythonhosted.org/packages/21/08/b1945d4b4986eb1aa10cf84efc5293bba39da80a2f95db3573dd90678408/pytest_xdist-2.5.0-py3-none-any.whl#sha256=6fe5c74fec98906deb8f2d2b616b5c782022744978e7bd4695d39c8f42d0ce65 +# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/80/b3/1beac14a88654d2e5120d0143b49be5ad450b86eb1963523d8dbdcc51eb2/sphinxcontrib_qthelp-1.0.7-py3-none-any.whl#sha256=e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 +# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/38/24/228bb903ea87b9e08ab33470e6102402a644127108c7117ac9c00d849f82/sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl#sha256=326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 +# pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f +# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 +# pip urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.whl#sha256=450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d +# pip jinja2 @ https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl#sha256=bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526 +# pip pytest @ https://files.pythonhosted.org/packages/51/ff/f6e8b8f39e08547faece4bd80f89d5a8de68a38b2d179cc1c4490ffa3286/pytest-7.4.4-py3-none-any.whl#sha256=b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8 +# pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 +# pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f +# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8 +# pip pooch @ https://files.pythonhosted.org/packages/f4/72/8ae0f1ba4ce6a4f6d4d01a60a9fdf690fde188c45c1872b0b4ddb0607ace/pooch-1.8.1-py3-none-any.whl#sha256=6b56611ac320c239faece1ac51a60b25796792599ce5c0b1bb87bf01df55e0a9 +# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 +# pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7 +# pip sphinx @ https://files.pythonhosted.org/packages/b4/fa/130c32ed94cf270e3d0b9ded16fb7b2c8fea86fa7263c29a696a30c1dde7/sphinx-7.3.7-py3-none-any.whl#sha256=413f75440be4cacf328f580b4274ada4565fb2187d696a84970c23f77b64d8c3 +# pip numpydoc @ https://files.pythonhosted.org/packages/f0/fa/dcfe0f65660661db757ee9ebd84e170ff98edd5d80235f62457d9088f85f/numpydoc-1.7.0-py3-none-any.whl#sha256=5a56419d931310d79a06cfc2a126d1558700feeb9b4f3d8dcae1a8134be829c9 diff --git a/build_tools/azure/py38_conda_defaults_openblas_environment.yml b/build_tools/azure/pymin_conda_defaults_openblas_environment.yml similarity index 58% rename from build_tools/azure/py38_conda_defaults_openblas_environment.yml rename to build_tools/azure/pymin_conda_defaults_openblas_environment.yml index b84fab29dda90..3a8379e28068e 100644 --- a/build_tools/azure/py38_conda_defaults_openblas_environment.yml +++ b/build_tools/azure/pymin_conda_defaults_openblas_environment.yml @@ -4,20 +4,20 @@ channels: - defaults dependencies: - - python=3.8 - - numpy=1.17.3 # min + - python=3.9 + - numpy=1.21 - blas[build=openblas] - - scipy=1.3.2 # min - - cython - - joblib - - threadpoolctl=2.2.0 - - matplotlib=3.1.3 # min - - pandas + - scipy=1.7 + - cython=3.0.10 # min + - joblib=1.2.0 # min + - matplotlib=3.3.4 # min - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow - - codecov - pytest-cov - - coverage=6.2 + - coverage - ccache + - pip + - pip: + - threadpoolctl==3.1.0 # min diff --git a/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock new file mode 100644 index 0000000000000..6e46719df47c4 --- /dev/null +++ b/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock @@ -0,0 +1,99 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: 7d61cf4d650f87956531ca703b2ac2eabd6d427b07664416d5420eb73b39bdf1 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d79366c90128f5dc444be8 +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.3.11-h06a4308_0.conda#08529eb3504712baabcbda266a19feb7 +https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran5-11.2.0-h1234567_1.conda#36a01a8c30e0cadf0d3e842c50b73f3b +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-11.2.0-h00389a5_1.conda#7429b67ab7b1d7cb99b9d1f3ddaec6e3 +https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd +https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297 +https://repo.anaconda.com/pkgs/main/linux-64/expat-2.6.2-h6a678d5_0.conda#55049db2772dae035f6b8a95f72b5970 +https://repo.anaconda.com/pkgs/main/linux-64/fftw-3.3.9-h5eee18b_2.conda#db1df41113accc18ec59a99f1631bfcd +https://repo.anaconda.com/pkgs/main/linux-64/icu-73.1-h6a678d5_0.conda#6d09df641fc23f7d277a04dc7ea32dd4 +https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h5eee18b_1.conda#ac373800fda872108412d1ccfe3fa572 +https://repo.anaconda.com/pkgs/main/linux-64/lerc-3.0-h295c915_0.conda#b97309770412f10bed8d9448f6f98f87 +https://repo.anaconda.com/pkgs/main/linux-64/libdeflate-1.17-h5eee18b_1.conda#82831ef0b6c9595382d74e0c281f6742 +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0 +https://repo.anaconda.com/pkgs/main/linux-64/libiconv-1.16-h5eee18b_3.conda#197b1a0886a31fccab2167340528eebc +https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.21-h043d6bf_0.conda#7f7324dcc3c4761a14f3e4ac443235a7 +https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 +https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.3.2-h5eee18b_0.conda#9179fc7baefa1e027f572edbc519d805 +https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518dcadd6aaee9aae47ba9a671553 +https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.4-h6a678d5_1.conda#2ee58861f2b92b868ce761abb831819d +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c +https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_1.conda#d1d1fc47640fe0d9f7fa64c0a054bfd8 +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25 +https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e +https://repo.anaconda.com/pkgs/main/linux-64/libcups-2.4.2-h2d74bed_1.conda#3f265c2172a9e8c90a74037b6fa13685 +https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20230828-h5eee18b_0.conda#850eb5a9d2d7d3c66cce12e84406ca08 +https://repo.anaconda.com/pkgs/main/linux-64/libllvm14-14.0.6-hdb19cb5_3.conda#aefea2b45cf32f12b4f1ffaa70aa3201 +https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.39-h5eee18b_0.conda#f6aee38184512eb05b06c2e94d39ab22 +https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.10.4-hfdd30dd_2.conda#ff7a0e3b92afb3c99b82c9f0ba8b5670 +https://repo.anaconda.com/pkgs/main/linux-64/pcre2-10.42-hebb0a14_1.conda#727e15c3cfa02b032da4eb0c1123e977 +https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597 +https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.5-hc292b87_2.conda#3b7fe809e5b429b4f90fe064842a2370 +https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.12.1-h4a9f257_0.conda#bdc7b5952e9c5dca01bc2f4ccef2f974 +https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.20.1-h143b758_1.conda#cf1accc86321fa25d6b978cc748039ae +https://repo.anaconda.com/pkgs/main/linux-64/libclang13-14.0.6-default_he11475f_1.conda#44890feda1cf51639d9c94afbacce011 +https://repo.anaconda.com/pkgs/main/linux-64/libglib-2.78.4-hdc74915_0.conda#2f6d27741e931d5b6ba56e1a1312aaf0 +https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.5.1-h6a678d5_0.conda#235a671f74f0c4ecad9f9b3b107e3566 +https://repo.anaconda.com/pkgs/main/linux-64/libxkbcommon-1.0.1-h5eee18b_1.conda#888b2e8f1bbf21017c503826e2d24b50 +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e +https://repo.anaconda.com/pkgs/main/linux-64/cyrus-sasl-2.1.28-h52b45da_1.conda#d634af1577e4008f9228ae96ce671c44 +https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.14.1-h4c34cd2_2.conda#f0b472f5b544f8d57beb09ed4a2932e1 +https://repo.anaconda.com/pkgs/main/linux-64/glib-tools-2.78.4-h6a678d5_0.conda#3dbe6227cd59818dca9afb75ccb70708 +https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff +https://repo.anaconda.com/pkgs/main/linux-64/libclang-14.0.6-default_hc6dbbc7_1.conda#8f12583c4027b2861cff470f6b8837c4 +https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.17-hdbd6064_0.conda#6bed363e25859faff66bf546a11c10e8 +https://repo.anaconda.com/pkgs/main/linux-64/openjpeg-2.4.0-h3ad879b_0.conda#86baecb47ecaa7f7ff2657a1f03b90c9 +https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.19-h955ad1f_1.conda#4b453281859c293c9d577271f3b18a0d +https://repo.anaconda.com/pkgs/main/linux-64/certifi-2024.2.2-py39h06a4308_0.conda#2bc1db9166ecbb968f61252e6f08c2ce +https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab +https://repo.anaconda.com/pkgs/main/linux-64/cython-3.0.10-py39h5eee18b_0.conda#1419a658ed2b4d5c3ac1964f33143b64 +https://repo.anaconda.com/pkgs/main/linux-64/exceptiongroup-1.2.0-py39h06a4308_0.conda#960e2cb83ac5134df8e593a130aa11af +https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 +https://repo.anaconda.com/pkgs/main/linux-64/glib-2.78.4-h6a678d5_0.conda#045ff487547f7b2b7ff01648681b8ebe +https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 +https://repo.anaconda.com/pkgs/main/linux-64/joblib-1.2.0-py39h06a4308_0.conda#ac1f5687d70aa1128cbecb26bc9e559d +https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.4-py39h6a678d5_0.conda#3d57aedbfbd054ce57fb3c1e4448828c +https://repo.anaconda.com/pkgs/main/linux-64/mysql-5.7.24-h721c034_2.conda#dfc19ca2466d275c4c1f73b62c57f37b +https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.21.6-py39h375b286_1.conda#0061d9193658774ab79fc85d143a94fc +https://repo.anaconda.com/pkgs/main/linux-64/packaging-23.2-py39h06a4308_0.conda#b3f88f45f31bde016e49be3e941e5272 +https://repo.anaconda.com/pkgs/main/linux-64/pillow-10.3.0-py39h5eee18b_0.conda#b346d6c71267c1553b6c18d3db5fdf6d +https://repo.anaconda.com/pkgs/main/linux-64/pluggy-1.0.0-py39h06a4308_1.conda#fb4fed11ed43cf727dbd51883cc1d9fa +https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py39h06a4308_0.conda#6c89bf6d2fdf6d24126e34cb83fd10f1 +https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-3.0.9-py39h06a4308_0.conda#3a0537468e59760404f63b4f04369828 +https://repo.anaconda.com/pkgs/main/linux-64/pyqt5-sip-12.13.0-py39h5eee18b_0.conda#256840c3841b52346ea5743be8490ede +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-69.5.1-py39h06a4308_0.conda#3eb144d481b39c0fbbced789dd9b76b3 +https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0 +https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a +https://repo.anaconda.com/pkgs/main/linux-64/tomli-2.0.1-py39h06a4308_0.conda#b06dffe7ddca2645ed72f5116f0a087d +https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.3.3-py39h5eee18b_0.conda#9c4bd985bb8adcd12f47e790e95a9333 +https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py39h06a4308_0.conda#40bb60408c7433d767fd8c65b35bc4a0 +https://repo.anaconda.com/pkgs/main/linux-64/coverage-7.2.2-py39h5eee18b_0.conda#e9da151b7e1f56be2cb569c65949a1d2 +https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4 +https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.1-h5eee18b_1.conda#f2f26e6f869b5d87f41bd059fae47c3e +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.21.6-py39hac523dd_1.conda#f379f92039f666828a193fadd18c9819 +https://repo.anaconda.com/pkgs/main/linux-64/pip-24.0-py39h06a4308_0.conda#7f8ce3af15cfecd12e4dda8c5cef5fb7 +https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.4.0-py39h06a4308_0.conda#99d92a7a39f7e615de84f8cc5606c49a +https://repo.anaconda.com/pkgs/main/linux-64/python-dateutil-2.9.0post0-py39h06a4308_0.conda#bb2c65e53e610ec258e03771cd79ad17 +https://repo.anaconda.com/pkgs/main/linux-64/sip-6.7.12-py39h6a678d5_0.conda#6988a3e12fcacfedcac523c1e4c3167c +https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1.conda#afd9cbe949d670d24cc0a007aaec1fe1 +https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.3.4-py39h62a2d02_0.conda#dbab28222c740af8e21a3e5e2882c178 +https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.1.0-py39h06a4308_1.conda#8f41fce21670b120bf7fa8a7883380d9 +https://repo.anaconda.com/pkgs/main/linux-64/pytest-xdist-3.5.0-py39h06a4308_0.conda#e1d7ffcb1ee2ed9a84800f5c4bbbd7ae +https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.7.3-py39hf838250_2.conda#0667ea5ac14d35e26da19a0f068739da +https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py39h79cecc1_0.conda#afc634da8b81dc504179d53d334e6e55 +https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h53bd1ea_10.conda#bd0c79e82df6323f638bdcb871891b61 +https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.10-py39h6a678d5_0.conda#52da5ff9b1144b078d2f41bab0b213f2 +https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.3.4-py39h06a4308_0.conda#384fc5e01ebfcf30e7161119d3029b5a +# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b diff --git a/build_tools/azure/py38_conda_forge_mkl_environment.yml b/build_tools/azure/pymin_conda_forge_mkl_environment.yml similarity index 82% rename from build_tools/azure/py38_conda_forge_mkl_environment.yml rename to build_tools/azure/pymin_conda_forge_mkl_environment.yml index 847d8f6e471c7..fbad1d5bd42a8 100644 --- a/build_tools/azure/py38_conda_forge_mkl_environment.yml +++ b/build_tools/azure/pymin_conda_forge_mkl_environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.8 + - python=3.9 - numpy - blas[build=mkl] - scipy @@ -12,10 +12,12 @@ dependencies: - joblib - threadpoolctl - matplotlib - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow - - codecov + - pip + - ninja + - meson-python - pytest-cov - coverage - wheel diff --git a/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock new file mode 100644 index 0000000000000..8f0a473c031ca --- /dev/null +++ b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock @@ -0,0 +1,118 @@ +# Generated by conda-lock. +# platform: win-64 +# input_hash: 4a2ac6360285edd6c1e8182dd51ef698c0041fa9843e4ad9d9bc9dec6a7c8d1d +@EXPLICIT +https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2024.2.2-h56e8100_0.conda#63da060240ab8087b60d1357051ea7d6 +https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2024.1.0-h57928b3_965.conda#c66eb2fd33b999ccc258aef85689758e +https://conda.anaconda.org/conda-forge/win-64/mkl-include-2024.1.0-h66d3029_692.conda#60233966dc7c0261c9a443120b43c477 +https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2#b0309b72560df66f71a9d5e34a5efdfa +https://conda.anaconda.org/conda-forge/win-64/python_abi-3.9-4_cp39.conda#948b0d93d4ab1372d8fd45e1560afd47 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_0.tar.bz2#72608f6cd3e5898229c3ea16deb1ac43 +https://conda.anaconda.org/conda-forge/win-64/m2w64-gmp-6.1.0-2.tar.bz2#53a1c73e1e3d185516d7e3af177596d9 +https://conda.anaconda.org/conda-forge/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2#774130a326dee16f1ceb05cc687ee4f0 +https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.38.33130-h82b7239_18.conda#8be79fdd2725ddf7bbf8a27a4c1f79ba +https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2#4289d80fb4d272f1f3b56cfe87ac90bd +https://conda.anaconda.org/conda-forge/win-64/vc-14.3-hcf57466_18.conda#20e1e652a4c740fa719002a8449994a2 +https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.38.33130-hcb4865c_18.conda#10d42885e3ed84e575b454db30f1aa93 +https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-hcfcfb64_5.conda#26eb8ca6ea332b675e11704cce84a3be +https://conda.anaconda.org/conda-forge/win-64/icu-73.2-h63175ca_0.conda#0f47d9e3192d9e09ae300da0d28e0f56 +https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2#1900cb3cab5055833cfddb0ba233b074 +https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.1.0-hcfcfb64_1.conda#f77f319fb82980166569e1280d5b2864 +https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.20-hcfcfb64_0.conda#b12b5bde5eb201a1df75e49320cc938a +https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2#2c96d1b6915b408893f9472569dee135 +https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-hcfcfb64_2.conda#e1eb10b1cca179f2baa3601e4efc8712 +https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-3.0.0-hcfcfb64_1.conda#3f1b948619c45b1ca714d60c7389092c +https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.4-h8ffe710_1.tar.bz2#04286d905a0dcb7f7d4a12bdfe02516d +https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.45.3-hcfcfb64_0.conda#73f5dc8e2d55d9a1e14b11f49c3b4a28 +https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.4.0-hcfcfb64_0.conda#abd61d0ab127ec5cd68f62c2969e6f34 +https://conda.anaconda.org/conda-forge/win-64/libzlib-1.2.13-hcfcfb64_5.conda#5fdb9c6a113b6b6cb5e517fd972d5f41 +https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2#066552ac6b907ec6d72c0ddab29050dc +https://conda.anaconda.org/conda-forge/win-64/ninja-1.12.1-hc790b64_0.conda#a557dde55343e03c68cd7e29e7f87279 +https://conda.anaconda.org/conda-forge/win-64/openssl-3.3.0-hcfcfb64_0.conda#a6c544c9f060740c625dbf6d92cf3495 +https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-hfa6e2cd_3.tar.bz2#e2da8758d7d51ff6aa78a14dfb9dbed4 +https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h5226925_1.conda#fc048363eb8f03cd1737600a5d08aafe +https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2#515d77642eaa3639413c6b1bc3f94219 +https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.2-heb0366b_0.conda#6e8b0f22b4eef3b3cb3849bb4c3d47f9 +https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-hcfcfb64_1.conda#19ce3e1dacc7912b3d6ff40690ba9ae0 +https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-hcfcfb64_1.conda#71e890a0b361fd58743a13f77e1506b7 +https://conda.anaconda.org/conda-forge/win-64/libintl-0.22.5-h5728263_2.conda#aa622c938af057adc119f8b8eecada01 +https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.43-h19919ed_0.conda#77e398acc32617a0384553aea29e866b +https://conda.anaconda.org/conda-forge/win-64/libvorbis-1.3.7-h0e60522_0.tar.bz2#e1a22282de0169c93e4ffe6ce6acc212 +https://conda.anaconda.org/conda-forge/win-64/libxml2-2.12.7-h283a6d9_0.conda#1451be68a5549561979125c1827b79ed +https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de +https://conda.anaconda.org/conda-forge/win-64/pcre2-10.43-h17e33f8_0.conda#d0485b8aa2cedb141a7bd27b4efa4c9c +https://conda.anaconda.org/conda-forge/win-64/python-3.9.19-h4de0772_0_cpython.conda#b6999bc275e0e6beae7b1c8ea0be1e85 +https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.6-h0ea2cb4_0.conda#9a17230f95733c04dc40a2b1e5491d74 +https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-hcfcfb64_1.conda#0105229d7c5fabaa840043a86c10ec64 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/win-64/cython-3.0.10-py39h99910a6_0.conda#8ebc2fca8a6840d0694f37e698f4e59c +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-hdaf720e_2.conda#3761b23693f768dc75a8fd0a73ca053f +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.5-py39h1f6ef14_1.conda#4fc5bd0a7b535252028c647cc27d6c87 +https://conda.anaconda.org/conda-forge/win-64/libclang13-18.1.5-default_hf64faad_0.conda#8a662434c6be1f40e2d5d2506d05a41d +https://conda.anaconda.org/conda-forge/win-64/libglib-2.80.2-h0df6a38_0.conda#ef9ae80bb2a15aee7a30180c057678ea +https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.10.0-default_h2fffb23_1000.conda#ee944f0d41d9e2048f9d7492c1623ca3 +https://conda.anaconda.org/conda-forge/win-64/libintl-devel-0.22.5-h5728263_2.conda#a2ad82fae23975e4ccbfab2847d31d48 +https://conda.anaconda.org/conda-forge/win-64/libtiff-4.6.0-hddb2be6_3.conda#6d1828c9039929e2f185c5fa9d133018 +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/win-64/tornado-6.4-py39ha55989b_0.conda#d8f52e8e1d02f9a5901f9224e2ddf98f +https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.1.0-py39ha55989b_0.conda#20ec896e8d97f2ff8be1124e624dc8f2 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.11-hcd874cb_0.conda#c46ba8712093cb0114404ae8a7582e1a +https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-hcfcfb64_1.conda#f47f6db2528e38321fb00ae31674c133 +https://conda.anaconda.org/conda-forge/win-64/coverage-7.5.1-py39ha55e580_0.conda#e8f43ea91f0f17d92d5575cfab41a42f +https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.80.2-h2f9d560_0.conda#42fc785d9db7ab051a206fbf882ecf2e +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/win-64/lcms2-2.16-h67d730c_0.conda#d3592435917b62a8becff3a60db674f6 +https://conda.anaconda.org/conda-forge/win-64/libxcb-1.15-hcd874cb_0.conda#090d91b69396f14afef450c285f9758c +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.2-h3d672ee_0.conda#7e7099ad94ac3b599808950cec30ad4e +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/win-64/sip-6.7.12-py39h99910a6_0.conda#0cc5774390ada632ed7975203057c91c +https://conda.anaconda.org/conda-forge/win-64/tbb-2021.12.0-h91493d7_0.conda#21745fdd12f01b41178596143cbecffd +https://conda.anaconda.org/conda-forge/win-64/fonttools-4.51.0-py39ha55989b_0.conda#5d19302bab29e347116b743e793aa7d6 +https://conda.anaconda.org/conda-forge/win-64/glib-2.80.2-h0df6a38_0.conda#a728ca6f04c33ecb0f39eeda5fbd0e23 +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/win-64/mkl-2024.1.0-h66d3029_692.conda#b43ec7ed045323edeff31e348eea8652 +https://conda.anaconda.org/conda-forge/win-64/pillow-10.3.0-py39h9ee4981_0.conda#6d69d57c41867acc162ef0205a8efaef +https://conda.anaconda.org/conda-forge/win-64/pyqt5-sip-12.12.2-py39h99910a6_5.conda#dffbcea794c524c471772a5f697c2aea +https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/win-64/gstreamer-1.24.3-h5006eae_0.conda#8c8959a520ef4911271fbf2cb2dfc3fe +https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-22_win64_mkl.conda#65c56ecdeceffd6c32d3d54db7e02c6e +https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2024.1.0-h57928b3_692.conda#9b3d1d4916a56fd32460f6fe784dcb51 +https://conda.anaconda.org/conda-forge/win-64/gst-plugins-base-1.24.3-hba88be7_0.conda#1fa879c7b4868c58830762b6fac0075d +https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-22_win64_mkl.conda#336c93ab102846c6131cf68e722a68f1 +https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-22_win64_mkl.conda#c752cc2af9f3d8d7b2fdebb915a33ef7 +https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-22_win64_mkl.conda#db33ffa4bae1d2f6d5602afaa048bf6b +https://conda.anaconda.org/conda-forge/win-64/numpy-1.26.4-py39hddb5d58_0.conda#6e30ff8f2d3f59f45347dfba8bc22a04 +https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-hcef0176_21.conda#76544d3dfeff8fd52250df168cb0005b +https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-22_win64_mkl.conda#adeb834f3b7b06f3d77cd90b7c9d08f0 +https://conda.anaconda.org/conda-forge/win-64/contourpy-1.2.1-py39h1f6ef14_0.conda#03e25c6bae87f4f9595337255b44b0fb +https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.9-py39hb77abff_5.conda#5ed899124a51958336371ff01482b8fd +https://conda.anaconda.org/conda-forge/win-64/scipy-1.13.0-py39h1a10956_1.conda#5624ccefd670072fc86b2cd4ffdc6c44 +https://conda.anaconda.org/conda-forge/win-64/blas-2.122-mkl.conda#aee642435696de144ddf91dc02101cf8 +https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.8.4-py39hf19769e_0.conda#7836c3dc5814f6d55a7392657c576e88 +https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.8.4-py39hcbf5309_0.conda#cc66c372d5eb745665da06ce56b7d72b diff --git a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml similarity index 78% rename from build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml rename to build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml index 1547bdb8b902b..855909a2c262a 100644 --- a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml +++ b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - - python=3.8 + - python=3.9 - numpy - blas[build=openblas] - scipy @@ -14,7 +14,12 @@ dependencies: - matplotlib - pandas - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow + - pip + - ninja + - meson-python + - sphinx + - numpydoc - ccache diff --git a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock new file mode 100644 index 0000000000000..1a4d0feae1773 --- /dev/null +++ b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock @@ -0,0 +1,205 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: a64ed7d3cc839a12cb1faa238a89d4aec55abc43d335791f0e8422f5722ff662 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 +https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-hd590300_0.conda#c0f3abb4a16477208bbd43a39bd56f18 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_0.conda#5d801a4906adc712d480afc362623b59 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.5-hb77312f_0.conda#efd221d3668077ca067a206269418dec +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.5-ha31de31_0.conda#b923cdb6e567ada84f991ffcc5848afb +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 +https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca +https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.5-default_h5d6823c_0.conda#60c39a00b694c98da03f67a3ba1d7499 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda#b7f5c092b8f9800150d998a71b76d5a1 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 +https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py39hd1e30aa_0.conda#79f5dd8778873faa54e8f7b2729fe8a6 +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.3-haf2f30d_0.conda#f3df87cc9ef0b5113bff55aefcbcafd5 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h474f0d3_0.conda#aa265f5697237aa13cc10f53fa8acc4f +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39h7633fee_0.conda#bdc188e59857d6efab332714e0d01d93 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.3-h9ad1361_0.conda#8fb0e954c616bb0f9389efac4b4ed44b +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py39hddac248_0.conda#259c4e76e6bda8888aefc098ae1ba749 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.0-py39haf93ffa_1.conda#57ce54e228e3fbc60e42fa368eff3251 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39he9076e7_0.conda#1919384a8420e7bb25f6c3a582e0857c +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39hda80f44_0.conda#f225666c47726329201b604060f1436c +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39hf3d152e_0.conda#c66d2da2669fddc657b679bccab95775 +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.7.0-pyhd8ed1ab_0.conda#1ad3afced398492586ca1bef70328be4 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 +https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e diff --git a/build_tools/azure/pypy3_environment.yml b/build_tools/azure/pypy3_environment.yml index b5cea70d70bad..285f1b0d51d17 100644 --- a/build_tools/azure/pypy3_environment.yml +++ b/build_tools/azure/pypy3_environment.yml @@ -14,6 +14,9 @@ dependencies: - threadpoolctl - matplotlib - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist + - pip + - ninja + - meson-python - ccache diff --git a/build_tools/azure/pypy3_linux-64_conda.lock b/build_tools/azure/pypy3_linux-64_conda.lock index 5c7eec061cdb7..ab6a908edf340 100644 --- a/build_tools/azure/pypy3_linux-64_conda.lock +++ b/build_tools/azure/pypy3_linux-64_conda.lock @@ -1,92 +1,103 @@ # Generated by conda-lock. # platform: linux-64 -# input_hash: 42c6166c936ee35159a6d1b5d7b6a9b30df5242f836e02d76e238e2d0f1faa43 +# input_hash: c4b15c5bfeffe4d558e4ece0c996e6cc04c00369326c72d19780ffc0209bd591 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.9.24-ha878542_0.tar.bz2#41e4e87062433e283696cf384f952ef6 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-12.2.0-h337968e_19.tar.bz2#164b4b1acaedc47ee7e658ae6b308ca3 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-12.2.0-h46fd767_19.tar.bz2#1030b1f38c129f2634eae026f704fe60 -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-3_pypy39_pp73.conda#6f23be0f8f1e4871998437b188425ea3 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2022f-h191b570_0.tar.bz2#e366350e2343a798e29833286abe2560 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-12.2.0-h69a702a_19.tar.bz2#cd7a806282c16e1f2d39a7e80d3a3e0d +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_pypy39_pp73.conda#c1b2f29111681a4036ed21eaa3f44620 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-12.2.0-h65d4601_19.tar.bz2#e4c94f80aef025c17ab0828cd85ef535 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-h27087fc_0.tar.bz2#c4fbad8d4bddeb3c085f18cbf97fbfad -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.14-h166bdaf_0.tar.bz2#fc84a0446e4e4fb882e78d786cfb9734 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.21-pthreads_h78a6416_3.tar.bz2#8c5963a49b6035c40646a763293fbb35 -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.4-h166bdaf_0.tar.bz2#ac2ccf7323d21f2994e4d1f5da664f37 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.3-h27087fc_1.tar.bz2#4acfc691e64342b9dae57cf2adc63238 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.7-h166bdaf_0.tar.bz2#d1ad1824c71e67dea42f07e06cd177dc +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-hd590300_0.conda#c0f3abb4a16477208bbd43a39bd56f18 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.9-h7f98852_0.tar.bz2#bf6f803a544f26ebbdc3bfff272eb179 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_openblas.tar.bz2#d9b7a8639171f6c6fa0a983edabcfe2b -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.40.0-h753d276_0.tar.bz2#2e5f9a37d487e1019fd4d8113adb2f9f -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-15.0.5-he0ac6c6_0.tar.bz2#5c4783b468153a1d8f33874c5bb55864 -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.21-pthreads_h320a7e8_3.tar.bz2#29155b9196b9d78022f11d86733e25a7 -https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h6239696_4.tar.bz2#adcf0be7897e73e312bd24353b613f74 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/ccache-4.7.3-h2599c5e_0.tar.bz2#4feea9466084c6948bd59539f1c0bb72 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_0.tar.bz2#4e54cbfc47b8c74c2ecc1e7730d8edce +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb https://conda.anaconda.org/conda-forge/linux-64/gdbm-1.18-h0a1914f_2.tar.bz2#b77bc399b07a19c00fe12fdc95ee0297 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_openblas.tar.bz2#20bae26d0a1db73f758fc3754cab4719 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_openblas.tar.bz2#955d993f41f9354bf753d29864ea20ad -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h55922b4_4.tar.bz2#901791f0ec7cddc8714e76e273013a91 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.40.0-h4ff8645_0.tar.bz2#bb11803129cbbb53ed56f9506ff74145 -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.14-h6ed2654_0.tar.bz2#dcc588839de1445d90995a0a2c4f3a39 -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_openblas.tar.bz2#823ceb5567e1a595deb643fcd17aed5a -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h7d73246_1.tar.bz2#a11b4df9271a8d7917686725aa04c8f2 -https://conda.anaconda.org/conda-forge/linux-64/pypy3.9-7.3.9-hd671c94_6.tar.bz2#5e87580e0dbd1a1a58b59d920b778537 -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-16_linux64_openblas.tar.bz2#519562d6176dab9c2ab9a8336a14c8e7 -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.12-0_73_pypy.tar.bz2#12c038a66ca998f24c381de990e942b6 -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/linux-64/blas-2.116-openblas.tar.bz2#02f34bcf0aceb6fae4c4d1ecb71c852a -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d +https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.5-ha31de31_0.conda#b923cdb6e567ada84f991ffcc5848afb +https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.45.3-h2c6b66d_0.conda#be7d70f2db41b674733667bdd69bd000 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f +https://conda.anaconda.org/conda-forge/linux-64/ccache-4.9.1-h1fcd64f_0.conda#3620f564bcf28c3524951b6f64f5c5ac +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/linux-64/pypy3.9-7.3.15-h9557127_1.conda#0862f2ce457660f1060225d96d468237 +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.18-1_73_pypy.conda#6e0143cd3dd940d3004cd857e37ccd81 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.32-py39h0e26352_1.tar.bz2#0806e9d3dc6d425beb030a0ed241e6bb -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py39h2865249_1.tar.bz2#6b7e75ba141872a00154f312d43d9a8c +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39hc10206b_0.conda#60c2d58b33a21c32f469e3f6a9eb7e4b +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39ha90811c_1.conda#25edffabcb0760fc1821597c4ce920db +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.23.5-py39h4fa106f_0.conda#e9f9bbb648b5cdf0b34b7d1a1e62469e -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py39hc6341f6_3.tar.bz2#34b52d9f57e05e9600dfe39fee936ff8 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.9-0_pypy39.tar.bz2#4f9efe821e2c2886da9c2fdc8b480738 -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h6dedee3_0.conda#557d64563e84ff21b14f586c7f662b7f +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90a76f3_0.conda#799e6519cfffe2784db27b1db2ef33f3 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.15-1_pypy39.conda#a418a6c16bd6f7ed56b92194214791a0 +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py39h4d8b378_1.tar.bz2#28cd3041080bd963493b35f7ac64cb12 -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py39h4d8b378_0.tar.bz2#44eea5be274d005065d87df9cf2a9234 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.6-py39h2865249_0.tar.bz2#96cd622e9709839879768bf1db2a7058 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.38.0-py39h4d8b378_1.tar.bz2#32eaab5fec9e6108cb431e7eec99d0cc -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.8.1-py39hec0f089_3.tar.bz2#6df34a135e04f0b91a90ef20a70f7dde -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.6.2-py39hd8616df_0.tar.bz2#03f52764fd4319bbbde7e62c84fc2e11 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39h81e4ded_2.tar.bz2#6fde94a3541607887bb0572be1991d9d -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.6.2-py39h4162558_0.tar.bz2#f392ad75fed5d80854323688aacc2bab -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hf860d4a_0.conda#e7fded713fb466e1e0670afce1761b47 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hf860d4a_0.conda#f699157518d28d00c87542b4ec1273be +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39ha90811c_0.conda#07ed14c8326da42356514bcbc0b04802 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py39hf860d4a_0.conda#63421b4dd7222fad555e34ec9af015a1 +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.12.0-py39h6dedee3_2.conda#6c5d74bac41838f4377dfd45085e1fec +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39h5fd064f_0.conda#04676d2a49da3cb608af77e04b796ce1 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39h4e7d633_0.conda#58272019e595dde98d0844ae3ebf0cfe +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39h4162558_0.conda#b0f7702a174422ff1db58190495fd766 diff --git a/build_tools/azure/pytest-pyodide.js b/build_tools/azure/pytest-pyodide.js new file mode 100644 index 0000000000000..c195940ce3b5b --- /dev/null +++ b/build_tools/azure/pytest-pyodide.js @@ -0,0 +1,53 @@ +const { opendir } = require('node:fs/promises'); +const { loadPyodide } = require("pyodide"); + +async function main() { + let exit_code = 0; + try { + global.pyodide = await loadPyodide(); + let pyodide = global.pyodide; + const FS = pyodide.FS; + const NODEFS = FS.filesystems.NODEFS; + + let mountDir = "/mnt"; + pyodide.FS.mkdir(mountDir); + pyodide.FS.mount(pyodide.FS.filesystems.NODEFS, { root: "." }, mountDir); + + await pyodide.loadPackage(["micropip"]); + await pyodide.runPythonAsync(` + import glob + import micropip + + wheels = glob.glob('/mnt/dist/*.whl') + wheels = [f'emfs://{wheel}' for wheel in wheels] + print(f'installing wheels: {wheels}') + await micropip.install(wheels); + + pkg_list = micropip.list() + print(pkg_list) + `); + + // Pyodide is built without OpenMP, need to set environment variable to + // skip related test + await pyodide.runPythonAsync(` + import os + os.environ['SKLEARN_SKIP_OPENMP_TEST'] = 'true' + `); + + await pyodide.runPythonAsync("import micropip; micropip.install('pytest')"); + let pytest = pyodide.pyimport("pytest"); + let args = process.argv.slice(2); + console.log('pytest args:', args); + exit_code = pytest.main(pyodide.toPy(args)); + } catch (e) { + console.error(e); + // Arbitrary exit code here. I have seen this code reached instead of a + // Pyodide fatal error sometimes + exit_code = 66; + + } finally { + process.exit(exit_code); + } +} + +main(); diff --git a/build_tools/azure/python_nogil_lock.txt b/build_tools/azure/python_nogil_lock.txt index 49952a2918837..03cd4f2e0c346 100644 --- a/build_tools/azure/python_nogil_lock.txt +++ b/build_tools/azure/python_nogil_lock.txt @@ -1,63 +1,72 @@ # -# This file is autogenerated by pip-compile with python 3.9 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: # # pip-compile --output-file=/scikit-learn/build_tools/azure/python_nogil_lock.txt /scikit-learn/build_tools/azure/python_nogil_requirements.txt # --index-url https://d1yxz45j0ypngg.cloudfront.net/ --extra-index-url https://pypi.org/simple -attrs==22.1.0 - # via pytest -cycler==0.11.0 +contourpy==1.1.1 + # via matplotlib +cycler==0.12.1 # via matplotlib -cython==0.29.27 +cython==3.0.10 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -execnet==1.9.0 +exceptiongroup==1.2.0 + # via pytest +execnet==2.0.2 # via pytest-xdist -iniconfig==1.1.1 +fonttools==4.50.0 + # via matplotlib +iniconfig==2.0.0 # via pytest -joblib==1.1.0 +joblib==1.3.2 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -kiwisolver==1.3.2 +kiwisolver==1.4.4 # via matplotlib -matplotlib==3.4.3 +matplotlib==3.6.2 + # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt +meson==1.4.0 + # via meson-python +meson-python==0.15.0 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -numpy==1.22.3 +ninja==1.11.1.1 + # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt +numpy==1.24.0 # via # -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt + # contourpy # matplotlib # scipy -packaging==21.3 - # via pytest -pillow==8.4.0 - # via matplotlib -pluggy==1.0.0 - # via pytest -py==1.11.0 - # via - # pytest - # pytest-forked -pyparsing==3.0.9 +packaging==24.0 # via # matplotlib - # packaging -pytest==6.2.5 + # pyproject-metadata + # pytest +pillow==9.5.0 + # via matplotlib +pluggy==1.4.0 + # via pytest +pyparsing==3.1.2 + # via matplotlib +pyproject-metadata==0.7.1 + # via meson-python +pytest==7.4.4 # via # -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt - # pytest-forked # pytest-xdist -pytest-forked==1.4.0 - # via pytest-xdist -pytest-xdist==2.5.0 +pytest-xdist==3.5.0 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via matplotlib -scipy==1.8.1 +scipy==1.9.3 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt six==1.16.0 # via python-dateutil -threadpoolctl==3.1.0 +threadpoolctl==3.4.0 # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt -toml==0.10.2 - # via pytest +tomli==2.0.1 + # via + # meson-python + # pytest diff --git a/build_tools/azure/python_nogil_requirements.txt b/build_tools/azure/python_nogil_requirements.txt index 466ceb35d382e..2cebad9a03b25 100644 --- a/build_tools/azure/python_nogil_requirements.txt +++ b/build_tools/azure/python_nogil_requirements.txt @@ -11,5 +11,10 @@ scipy cython joblib threadpoolctl -pytest==6.2.5 +# TODO: somehow pytest 8 does not seem to work with meson editable +# install. Exit code is 5, i.e. no test collected +# This would be fixed by https://github.com/mesonbuild/meson-python/pull/569 +pytest<8 pytest-xdist +meson-python +ninja diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh index f2f4690f6633d..faf48e27efefb 100755 --- a/build_tools/azure/test_script.sh +++ b/build_tools/azure/test_script.sh @@ -2,14 +2,10 @@ set -e -# defines the show_installed_libraries function +# Defines the show_installed_libraries and activate_environment functions. source build_tools/shared.sh -if [[ "$DISTRIB" =~ ^conda.* ]]; then - source activate $VIRTUALENV -elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "debian-32" || "$DISTRIB" == "pip-nogil" ]]; then - source $VIRTUALENV/bin/activate -fi +activate_environment if [[ "$BUILD_REASON" == "Schedule" ]]; then # Enable global random seed randomization to discover seed-sensitive tests @@ -34,7 +30,8 @@ mkdir -p $TEST_DIR cp setup.cfg $TEST_DIR cd $TEST_DIR -python -c "import joblib; print(f'Number of cores: {joblib.cpu_count()}')" +python -c "import joblib; print(f'Number of cores (physical): \ +{joblib.cpu_count()} ({joblib.cpu_count(only_physical_cores=True)})')" python -c "import sklearn; sklearn.show_versions()" show_installed_libraries @@ -51,30 +48,9 @@ if [[ "$COVERAGE" == "true" ]]; then TEST_CMD="$TEST_CMD --cov-config='$COVERAGE_PROCESS_START' --cov sklearn --cov-report=" fi -if [[ -n "$CHECK_WARNINGS" ]]; then - TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning -Werror::numpy.VisibleDeprecationWarning" - - # numpy's 1.19.0's tostring() deprecation is ignored until scipy and joblib - # removes its usage - TEST_CMD="$TEST_CMD -Wignore:tostring:DeprecationWarning" - - # Ignore distutils deprecation warning, used by joblib internally - TEST_CMD="$TEST_CMD -Wignore:distutils\ Version\ classes\ are\ deprecated:DeprecationWarning" - - # In some case, exceptions are raised (by bug) in tests, and captured by pytest, - # but not raised again. This is for instance the case when Cython directives are - # activated: IndexErrors (which aren't fatal) are raised on out-of-bound accesses. - # In those cases, pytest instead raises pytest.PytestUnraisableExceptionWarnings, - # which we must treat as errors on the CI. - TEST_CMD="$TEST_CMD -Werror::pytest.PytestUnraisableExceptionWarning" -fi - if [[ "$PYTEST_XDIST_VERSION" != "none" ]]; then - TEST_CMD="$TEST_CMD -n$CPU_COUNT" -fi - -if [[ "$SHOW_SHORT_SUMMARY" == "true" ]]; then - TEST_CMD="$TEST_CMD -ra" + XDIST_WORKERS=$(python -c "import joblib; print(joblib.cpu_count(only_physical_cores=True))") + TEST_CMD="$TEST_CMD -n$XDIST_WORKERS" fi if [[ -n "$SELECTED_TESTS" ]]; then @@ -84,6 +60,15 @@ if [[ -n "$SELECTED_TESTS" ]]; then export SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all" fi +TEST_CMD="$TEST_CMD --pyargs sklearn" +if [[ "$DISTRIB" == "conda-pypy3" ]]; then + # Run only common tests for PyPy. Running the full test suite uses too + # much memory and causes the test to time out sometimes. See + # https://github.com/scikit-learn/scikit-learn/issues/27662 for more + # details. + TEST_CMD="$TEST_CMD.tests.test_common" +fi + set -x -eval "$TEST_CMD --pyargs sklearn" +eval "$TEST_CMD" set +x diff --git a/build_tools/azure/test_script_pyodide.sh b/build_tools/azure/test_script_pyodide.sh new file mode 100644 index 0000000000000..d1aa207f864a2 --- /dev/null +++ b/build_tools/azure/test_script_pyodide.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -e + +# We are using a pytest js wrapper script to run tests inside Pyodide. Maybe +# one day we can use a Pyodide venv instead but at the time of writing +# (2023-09-27) there is an issue with scipy.linalg in a Pyodide venv, see +# https://github.com/pyodide/pyodide/issues/3865 for more details. +node build_tools/azure/pytest-pyodide.js --pyargs sklearn --durations 20 --showlocals diff --git a/build_tools/azure/ubuntu_atlas_lock.txt b/build_tools/azure/ubuntu_atlas_lock.txt index 18a8bb167119f..d1674c678b254 100644 --- a/build_tools/azure/ubuntu_atlas_lock.txt +++ b/build_tools/azure/ubuntu_atlas_lock.txt @@ -1,39 +1,43 @@ # -# This file is autogenerated by pip-compile with python 3.8 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # # pip-compile --output-file=build_tools/azure/ubuntu_atlas_lock.txt build_tools/azure/ubuntu_atlas_requirements.txt # -attrs==22.1.0 - # via pytest -cython==0.29.32 +cython==3.0.10 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -exceptiongroup==1.0.4 +exceptiongroup==1.2.1 # via pytest -execnet==1.9.0 +execnet==2.1.1 # via pytest-xdist -iniconfig==1.1.1 +iniconfig==2.0.0 # via pytest -joblib==1.1.1 +joblib==1.2.0 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -packaging==21.3 - # via pytest -pluggy==1.0.0 +meson==1.4.0 + # via meson-python +meson-python==0.16.0 + # via -r build_tools/azure/ubuntu_atlas_requirements.txt +ninja==1.11.1.1 + # via -r build_tools/azure/ubuntu_atlas_requirements.txt +packaging==24.0 + # via + # meson-python + # pyproject-metadata + # pytest +pluggy==1.5.0 # via pytest -py==1.11.0 - # via pytest-forked -pyparsing==3.0.9 - # via packaging -pytest==7.2.0 +pyproject-metadata==0.8.0 + # via meson-python +pytest==7.4.4 # via # -r build_tools/azure/ubuntu_atlas_requirements.txt - # pytest-forked # pytest-xdist -pytest-forked==1.4.0 - # via pytest-xdist -pytest-xdist==2.5.0 +pytest-xdist==3.6.1 # via -r build_tools/azure/ubuntu_atlas_requirements.txt -threadpoolctl==2.0.0 +threadpoolctl==3.1.0 # via -r build_tools/azure/ubuntu_atlas_requirements.txt tomli==2.0.1 - # via pytest + # via + # meson-python + # pytest diff --git a/build_tools/azure/ubuntu_atlas_requirements.txt b/build_tools/azure/ubuntu_atlas_requirements.txt index 57413851e5329..805d84d4d0aac 100644 --- a/build_tools/azure/ubuntu_atlas_requirements.txt +++ b/build_tools/azure/ubuntu_atlas_requirements.txt @@ -1,8 +1,10 @@ # DO NOT EDIT: this file is generated from the specification found in the # following script to centralize the configuration for CI builds: # build_tools/update_environments_and_lock_files.py -cython -joblib==1.1.1 # min -threadpoolctl==2.0.0 # min -pytest -pytest-xdist==2.5.0 +cython==3.0.10 # min +joblib==1.2.0 # min +threadpoolctl==3.1.0 # min +pytest<8 +pytest-xdist +ninja +meson-python diff --git a/build_tools/azure/upload_codecov.sh b/build_tools/azure/upload_codecov.sh index 274106cb19f75..0e87b2dafc8b4 100755 --- a/build_tools/azure/upload_codecov.sh +++ b/build_tools/azure/upload_codecov.sh @@ -2,15 +2,56 @@ set -e -# called when COVERAGE=="true" and DISTRIB=="conda" -export PATH=$HOME/miniconda3/bin:$PATH -source activate $VIRTUALENV - -# Need to run codecov from a git checkout, so we copy .coverage -# from TEST_DIR where pytest has been run -pushd $TEST_DIR -coverage combine --append -popd -cp $TEST_DIR/.coverage $BUILD_REPOSITORY_LOCALPATH - -codecov --root $BUILD_REPOSITORY_LOCALPATH -t $CODECOV_TOKEN || echo "codecov upload failed" +# Do not upload to codecov on forks +if [[ "$BUILD_REPOSITORY_NAME" != "scikit-learn/scikit-learn" ]]; then + exit 0 +fi + +# When we update the codecov uploader version, we need to update the checksums. +# The checksum for each codecov binary is available at +# https://uploader.codecov.io e.g. for linux +# https://uploader.codecov.io/v0.7.1/linux/codecov.SHA256SUM. + +# Instead of hardcoding a specific version and signature in this script, it +# would be possible to use the "latest" symlink URL but then we need to +# download both the codecov.SHA256SUM files each time and check the signatures +# with the codecov gpg key as well, see: +# https://docs.codecov.com/docs/codecov-uploader#integrity-checking-the-uploader +# However this approach would yield a larger number of downloads from +# codecov.io and keybase.io, therefore increasing the risk of running into +# network failures. +CODECOV_UPLOADER_VERSION=0.7.1 +CODECOV_BASE_URL="https://uploader.codecov.io/v$CODECOV_UPLOADER_VERSION" + + +# Check that the git repo is located at the expected location: +if [[ ! -d "$BUILD_REPOSITORY_LOCALPATH/.git" ]]; then + echo "Could not find the git checkout at $BUILD_REPOSITORY_LOCALPATH" + exit 1 +fi + +# Check that the combined coverage file exists at the expected location: +export COVERAGE_XML="$BUILD_REPOSITORY_LOCALPATH/coverage.xml" +if [[ ! -f "$COVERAGE_XML" ]]; then + echo "Could not find the combined coverage file at $COVERAGE_XML" + exit 1 +fi + +if [[ $OSTYPE == *"linux"* ]]; then + curl -Os "$CODECOV_BASE_URL/linux/codecov" + SHA256SUM="b9282b8b43eef83f722646d8992c4dd36563046afe0806722184e7e9923a6d7b codecov" + echo "$SHA256SUM" | shasum -a256 -c + chmod +x codecov + ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose +elif [[ $OSTYPE == *"darwin"* ]]; then + curl -Os "$CODECOV_BASE_URL/macos/codecov" + SHA256SUM="e4ce34c144d3195eccb7f8b9ca8de092d2a4be114d927ca942500f3a6326225c codecov" + echo "$SHA256SUM" | shasum -a256 -c + chmod +x codecov + ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose +else + curl -Os "$CODECOV_BASE_URL/windows/codecov.exe" + SHA256SUM="f5de88026f061ff08b88a5895f9c11855523924ceb8174e027403dd20fa5e4d6 codecov.exe" + echo "$SHA256SUM" | sha256sum -c + ./codecov.exe -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose +fi diff --git a/build_tools/azure/windows.yml b/build_tools/azure/windows.yml index ea97b7eb5eaf0..1727da4138f07 100644 --- a/build_tools/azure/windows.yml +++ b/build_tools/azure/windows.yml @@ -19,7 +19,6 @@ jobs: PYTEST_XDIST_VERSION: 'latest' TEST_DIR: '$(Agent.WorkFolder)/tmp_folder' SHOW_SHORT_SUMMARY: 'false' - CPU_COUNT: '2' strategy: matrix: ${{ insert }}: ${{ parameters.matrix }} @@ -37,17 +36,15 @@ jobs: addToPath: true architecture: 'x86' displayName: Use 32 bit System Python - condition: eq(variables['PYTHON_ARCH'], '32') - - bash: ./build_tools/azure/install_win.sh + condition: and(succeeded(), eq(variables['PYTHON_ARCH'], '32')) + - bash: ./build_tools/azure/install.sh displayName: 'Install' - bash: ./build_tools/azure/test_script.sh displayName: 'Test Library' - - bash: ./build_tools/azure/upload_codecov.sh + - bash: ./build_tools/azure/combine_coverage_reports.sh condition: and(succeeded(), eq(variables['COVERAGE'], 'true'), eq(variables['SELECTED_TESTS'], '')) - displayName: 'Upload To Codecov' - env: - CODECOV_TOKEN: $(CODECOV_TOKEN) + displayName: 'Combine coverage' - task: PublishTestResults@2 inputs: testResultsFiles: '$(TEST_DIR)/$(JUNITXML)' @@ -78,3 +75,11 @@ jobs: JUNIT_FILE: $(TEST_DIR)/$(JUNITXML) condition: and(succeededOrFailed(), eq(variables['CREATE_ISSUE_ON_TRACKER'], 'true'), eq(variables['Build.Reason'], 'Schedule')) + - bash: ./build_tools/azure/upload_codecov.sh + condition: and(succeeded(), + eq(variables['COVERAGE'], 'true'), + eq(variables['SELECTED_TESTS'], '')) + displayName: 'Upload To Codecov' + retryCountOnTaskFailure: 5 + env: + CODECOV_TOKEN: $(CODECOV_TOKEN) diff --git a/build_tools/github/build_doc.sh b/build_tools/circle/build_doc.sh similarity index 96% rename from build_tools/github/build_doc.sh rename to build_tools/circle/build_doc.sh index 249dd82e798b6..35fee3ae50b65 100755 --- a/build_tools/github/build_doc.sh +++ b/build_tools/circle/build_doc.sh @@ -16,9 +16,12 @@ set -e # If the inspection of the current commit fails for any reason, the default # behavior is to quick build the documentation. +# defines the get_dep and show_installed_libraries functions +source build_tools/shared.sh + if [ -n "$GITHUB_ACTION" ] then - # Map the variables for the new documentation builder to the old one + # Map the variables from Github Action to CircleCI CIRCLE_SHA1=$(git log -1 --pretty=format:%H) CIRCLE_JOB=$GITHUB_JOB @@ -145,8 +148,6 @@ else make_args=html fi -make_args="SPHINXOPTS=-T $make_args" # show full traceback on exception - # Installing required system packages to support the rendering of math # notation in the HTML documentation and to optimize the image files sudo -E apt-get -yq update --allow-releaseinfo-change @@ -169,11 +170,12 @@ ccache -M 512M export CCACHE_COMPRESS=1 # pin conda-lock to latest released version (needs manual update from time to time) -mamba install conda-lock==1.0.5 -y -conda-lock install --log-level WARNING --name $CONDA_ENV_NAME $LOCK_FILE +mamba install "$(get_dep conda-lock min)" -y + +conda-lock install --log-level DEBUG --name $CONDA_ENV_NAME $LOCK_FILE source activate $CONDA_ENV_NAME -mamba list +show_installed_libraries # Set parallelism to 3 to overlap IO bound tasks with CPU bound tasks on CI # workers with 2 cores when building the compiled extensions of scikit-learn. @@ -191,6 +193,7 @@ then python build_tools/circle/list_versions.py > doc/versions.rst fi + # The pipefail is requested to propagate exit code set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt diff --git a/build_tools/github/doc_environment.yml b/build_tools/circle/doc_environment.yml similarity index 74% rename from build_tools/github/doc_environment.yml rename to build_tools/circle/doc_environment.yml index 848282abc18fe..4df22341635a3 100644 --- a/build_tools/github/doc_environment.yml +++ b/build_tools/circle/doc_environment.yml @@ -14,19 +14,26 @@ dependencies: - matplotlib - pandas - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow + - pip + - ninja + - meson-python - scikit-image - seaborn - memory_profiler - compilers - sphinx - sphinx-gallery + - sphinx-copybutton - numpydoc - sphinx-prompt - plotly + - polars - pooch + - sphinxext-opengraph - pip - pip: - - sphinxext-opengraph + - jupyterlite-sphinx + - jupyterlite-pyodide-kernel diff --git a/build_tools/circle/doc_linux-64_conda.lock b/build_tools/circle/doc_linux-64_conda.lock new file mode 100644 index 0000000000000..34ec64ad5863b --- /dev/null +++ b/build_tools/circle/doc_linux-64_conda.lock @@ -0,0 +1,322 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: b57888763997b08b2f240b5ff1ed6afcf88685f3d8c791ea8eba4d80483c43d0 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 +https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h0223996_107.conda#851e9651c9e4cd5dc19f80398eba9a1c +https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h0223996_107.conda#167a1f5d77d8f3c2a638f7eb418429f1 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h77fa898_7.conda#abf3fec87c2563697defa759dec3d639 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf +https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-ha885e6a_0.conda#800a4c872b5bc06fa83888d112fe6c4f +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-h4852527_0.conda#a05c7712be80622934f7011e0a1d43fc +https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.40-hdade7a5_3.conda#2d9a60578bc28469d9aeef9aea5520c3 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 +https://conda.anaconda.org/conda-forge/linux-64/aom-3.9.0-hac33072_0.conda#93a3bf248e5bc729807db198a9c89f07 +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/charls-2.4.2-h59595ed_0.conda#4336bd67920dd504cd8c6761d6a99645 +https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.2.1-hd590300_0.conda#418c6ca5929a611cbd69204907a83995 +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda#3bf7b9fd5a7136126e0234db4b87c8b6 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-hd590300_3.conda#5aeabe88534ea4169d4c49998f293d6c +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f +https://conda.anaconda.org/conda-forge/linux-64/libaec-1.1.3-h59595ed_0.conda#5e97e271911b8b2001a8b71860c32faa +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libhwy-1.1.0-h00ab1b0_0.conda#88928158ccfe797eac29ef5e03f7d23d +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f +https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-12.3.0-hb8811af_7.conda#ee573415c47ce17f65101d0b3fba396d +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2 +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 +https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-hd590300_0.conda#c0f3abb4a16477208bbd43a39bd56f18 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/rav1e-0.6.6-he8a937b_2.conda#77d9955b4abddb811cb8ab1aa7d743e4 +https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.0-hdb0a2a9_1.conda#843bbb8ace1d64ac50d64639ff38b014 +https://conda.anaconda.org/conda-forge/linux-64/svt-av1-2.0.0-h59595ed_0.conda#207e01ffa0eb2d2efb83fb6f46365a21 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.1-h59595ed_0.conda#fd486bffbf0d6841cf1456a8f2e3a995 +https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.0.7-h0b41bf4_0.conda#49e8329110001f04923fe7e864990b0c +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.3.0-h58ffeeb_7.conda#95f78565a09852783d3e90e0389cfa5f +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libavif16-1.0.4-hfa3d5b6_3.conda#3518d00de414c39b46d87dcc1ff65661 +https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f +https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_0.conda#5d801a4906adc712d480afc362623b59 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.5-hc2324a3_1.conda#11d76bee958b1989bd1ac6ee7372ea6d +https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba +https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.14.4-hb4ffafa_1.conda#84eb54e92644c328e087e1c725773317 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gcc-12.3.0-h915e2ae_7.conda#84b1c5cebd0a0443f3d7f90a4be93fc6 +https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.3.0-h6477408_3.conda#7a53f84c45bdf4656ba27b9e9ed68b3d +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-h1645026_7.conda#2d9d4058c433c9ce2a811c76658c4efd +https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-h2a574ab_7.conda#265caa78b979f112fc241cecd0015c91 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libjxl-0.10.2-hcae5a98_0.conda#901db891e1e21afd8524cd636a8c8e3b +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.5-hb77312f_0.conda#efd221d3668077ca067a206269418dec +https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_h413a1c8_0.conda#a356024784da6dfd4683dc5ecf45b155 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.5-ha31de31_0.conda#b923cdb6e567ada84f991ffcc5848afb +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb +https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a +https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.7.0-hd590300_1.conda#e9dffe1056994133616378309f932d77 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d +https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h915e2ae_7.conda#8efa768f7f74085629f3e1090e7f0569 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h617cb40_3.conda#3a9e5b8a6f651ff14e74d896d8f04ab6 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 +https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h915e2ae_7.conda#721c5433122a02bf3a081db10a2e68e2 +https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-h4a1b8e8_3.conda#9ec22c7c544f4a4f6d660f0a3b0fd15c +https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca +https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_openblas.conda#1a2a0cd3153464fee6646f3dd6dad9b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.5-default_h5d6823c_0.conda#60c39a00b694c98da03f67a3ba1d7499 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/noarch/networkx-3.2.1-pyhd8ed1ab_0.conda#425fce3b531bed6ec3c74fab3e5f0a1c +https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h7a3da1a_0.conda#4b422ebe8fc6a5320d0c1c22e5a46032 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.2.2-pyhd8ed1ab_0.conda#6f6cf28bf8e021933869bae3f84b8fc9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.8-py39hd1e30aa_0.conda#ec86403fde8793ac1c36f8afa3d15902 +https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda#b7f5c092b8f9800150d998a71b76d5a1 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 +https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb +https://conda.anaconda.org/conda-forge/noarch/tenacity-8.3.0-pyhd8ed1ab_0.conda#216cfa8e32bcd1447646768351df6059 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.11.0-pyha770c72_0.conda#6ef2fc37559256cf682d8b3375e89b80 +https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e +https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_1.conda#28de2e073db9ca9b72858bee9fb6f571 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.51.0-py39hd1e30aa_0.conda#79f5dd8778873faa54e8f7b2729fe8a6 +https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_1.conda#cf4b0e7c4c78bb0662aed9b27c414a3c +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_openblas.conda#4b31699e0ec5de64d5896e580389c9a1 +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_openblas.conda#b083767b6c877e24ee597d93b87ab838 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 +https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/plotly-5.22.0-pyhd8ed1ab_0.conda#5b409a5f738e7d76c2b426eddb7e9956 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 +https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_1.conda#d8d07866ac3b5b6937213c89a1874f08 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.3-haf2f30d_0.conda#f3df87cc9ef0b5113bff55aefcbcafd5 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/noarch/lazy_loader-0.4-pyhd8ed1ab_0.conda#a284ff318fbdb0dd83928275b4b6087c +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_openblas.conda#1fd156abd41a4992835952f6f4d951d0 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py39h474f0d3_0.conda#aa265f5697237aa13cc10f53fa8acc4f +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_openblas.conda#63ddb593595c9cf5eb08d3de54d66df8 +https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39h7633fee_0.conda#bdc188e59857d6efab332714e0d01d93 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.3-h9ad1361_0.conda#8fb0e954c616bb0f9389efac4b4ed44b +https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2024.1.1-py39ha98d97a_6.conda#9ada409e8a8202f848abfed8e4e3f6be +https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.1-pyh4b66e23_0.conda#bcf6a6f4c6889ca083e8d33afbafb8d5 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py39hddac248_0.conda#259c4e76e6bda8888aefc098ae1ba749 +https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.26-py39ha963410_0.conda#d138679a254e4e0918cfc1114c928bb8 +https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.1-pyhd8ed1ab_0.conda#d15917f33140f8d2ac9ca44db7ec8a25 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h44dd56e_1.conda#d037c20e3da2e85f03ebd20ad480c359 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.0-py39haf93ffa_1.conda#57ce54e228e3fbc60e42fa368eff3251 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-openblas.conda#5065468105542a8b23ea47bd8b6fa55f +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.4-py39he9076e7_0.conda#1919384a8420e7bb25f6c3a582e0857c +https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.1.0-py39hda80f44_0.conda#f225666c47726329201b604060f1436c +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.1-py39h44dd56e_0.conda#dc565186b972bd87e49b9c35390ddd8c +https://conda.anaconda.org/conda-forge/noarch/tifffile-2024.5.10-pyhd8ed1ab_0.conda#125438a8b679e4c08ee8f244177216c9 +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 +https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.22.0-py39hddac248_2.conda#8d502a4d2cbe5a45ff35ca8af8cbec0a +https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.13.2-pyhd8ed1ab_2.conda#b713b116feaf98acdba93ad4d7f90ca1 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.4-py39hf3d152e_0.conda#c66d2da2669fddc657b679bccab95775 +https://conda.anaconda.org/conda-forge/noarch/seaborn-0.13.2-hd8ed1ab_2.conda#a79d8797f62715255308d92d3a91ef2e +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.7.0-pyhd8ed1ab_0.conda#1ad3afced398492586ca1bef70328be4 +https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 +https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.16.0-pyhd8ed1ab_0.conda#add28691ee89e875b190eda07929d5d4 +https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 +https://conda.anaconda.org/conda-forge/noarch/sphinx-7.3.7-pyhd8ed1ab_0.conda#7b1465205e28d75d2c0e1a868ee00a67 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e +https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1ab_0.conda#286283e05a1eff606f55e7cd70f6d7f7 +# pip attrs @ https://files.pythonhosted.org/packages/e0/44/827b2a91a5816512fcaf3cc4ebc465ccd5d598c45cefa6703fcf4a79018f/attrs-23.2.0-py3-none-any.whl#sha256=99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 +# pip cloudpickle @ https://files.pythonhosted.org/packages/96/43/dae06432d0c4b1dc9e9149ad37b4ca8384cf6eb7700cd9215b177b914f0a/cloudpickle-3.0.0-py3-none-any.whl#sha256=246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7 +# pip defusedxml @ https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl#sha256=a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61 +# pip fastjsonschema @ https://files.pythonhosted.org/packages/9c/b9/79691036d4a8f9857e74d1728b23f34f583b81350a27492edda58d5604e1/fastjsonschema-2.19.1-py3-none-any.whl#sha256=3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0 +# pip fqdn @ https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl#sha256=3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014 +# pip json5 @ https://files.pythonhosted.org/packages/8a/3c/4f8791ee53ab9eeb0b022205aa79387119a74cc9429582ce04098e6fc540/json5-0.9.25-py3-none-any.whl#sha256=34ed7d834b1341a86987ed52f3f76cd8ee184394906b6e22a1e0deb9ab294e8f +# pip jsonpointer @ https://files.pythonhosted.org/packages/12/f6/0232cc0c617e195f06f810534d00b74d2f348fe71b2118009ad8ad31f878/jsonpointer-2.4-py2.py3-none-any.whl#sha256=15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a +# pip jupyterlab-pygments @ https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl#sha256=841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780 +# pip mistune @ https://files.pythonhosted.org/packages/f0/74/c95adcdf032956d9ef6c89a9b8a5152bf73915f8c633f3e3d88d06bd699c/mistune-3.0.2-py3-none-any.whl#sha256=71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205 +# pip overrides @ https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl#sha256=c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49 +# pip pandocfilters @ https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl#sha256=93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc +# pip pkginfo @ https://files.pythonhosted.org/packages/56/09/054aea9b7534a15ad38a363a2bd974c20646ab1582a387a95b8df1bfea1c/pkginfo-1.10.0-py3-none-any.whl#sha256=889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 +# pip prometheus-client @ https://files.pythonhosted.org/packages/c7/98/745b810d822103adca2df8decd4c0bbe839ba7ad3511af3f0d09692fc0f0/prometheus_client-0.20.0-py3-none-any.whl#sha256=cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7 +# pip ptyprocess @ https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 +# pip pycparser @ https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl#sha256=c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc +# pip python-json-logger @ https://files.pythonhosted.org/packages/35/a6/145655273568ee78a581e734cf35beb9e33a370b29c5d3c8fee3744de29f/python_json_logger-2.0.7-py3-none-any.whl#sha256=f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd +# pip pyyaml @ https://files.pythonhosted.org/packages/7d/39/472f2554a0f1e825bd7c5afc11c817cd7a2f3657460f7159f691fbb37c51/PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c +# pip rfc3986-validator @ https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl#sha256=2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9 +# pip rpds-py @ https://files.pythonhosted.org/packages/97/b1/12238bd8cdf3cef71e85188af133399bfde1bddf319007361cc869d6f6a7/rpds_py-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e4c39ad2f512b4041343ea3c7894339e4ca7839ac38ca83d68a832fc8b3748ab +# pip send2trash @ https://files.pythonhosted.org/packages/40/b0/4562db6223154aa4e22f939003cb92514c79f3d4dccca3444253fd17f902/Send2Trash-1.8.3-py3-none-any.whl#sha256=0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9 +# pip sniffio @ https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl#sha256=2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 +# pip soupsieve @ https://files.pythonhosted.org/packages/4c/f3/038b302fdfbe3be7da016777069f26ceefe11a681055ea1f7817546508e3/soupsieve-2.5-py3-none-any.whl#sha256=eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7 +# pip traitlets @ https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl#sha256=b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f +# pip types-python-dateutil @ https://files.pythonhosted.org/packages/c7/1b/af4f4c4f3f7339a4b7eb3c0ab13416db98f8ac09de3399129ee5fdfa282b/types_python_dateutil-2.9.0.20240316-py3-none-any.whl#sha256=6b8cb66d960771ce5ff974e9dd45e38facb81718cc1e208b10b1baccbfdbee3b +# pip uri-template @ https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl#sha256=a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363 +# pip webcolors @ https://files.pythonhosted.org/packages/d5/e1/3e9013159b4cbb71df9bd7611cbf90dc2c621c8aeeb677fc41dad72f2261/webcolors-1.13-py3-none-any.whl#sha256=29bc7e8752c0a1bd4a1f03c14d6e6a72e93d82193738fa860cbff59d0fcc11bf +# pip webencodings @ https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl#sha256=a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 +# pip websocket-client @ https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl#sha256=17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526 +# pip anyio @ https://files.pythonhosted.org/packages/14/fd/2f20c40b45e4fb4324834aea24bd4afdf1143390242c0b33774da0e2e34f/anyio-4.3.0-py3-none-any.whl#sha256=048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8 +# pip arrow @ https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl#sha256=c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80 +# pip beautifulsoup4 @ https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl#sha256=b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed +# pip bleach @ https://files.pythonhosted.org/packages/ea/63/da7237f805089ecc28a3f36bca6a21c31fcbc2eb380f3b8f1be3312abd14/bleach-6.1.0-py3-none-any.whl#sha256=3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6 +# pip cffi @ https://files.pythonhosted.org/packages/ea/ac/e9e77bc385729035143e54cc8c4785bd480eaca9df17565963556b0b7a93/cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 +# pip doit @ https://files.pythonhosted.org/packages/44/83/a2960d2c975836daa629a73995134fd86520c101412578c57da3d2aa71ee/doit-0.36.0-py3-none-any.whl#sha256=ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a +# pip jupyter-core @ https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl#sha256=4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409 +# pip referencing @ https://files.pythonhosted.org/packages/b7/59/2056f61236782a2c86b33906c025d4f4a0b17be0161b63b70fd9e8775d36/referencing-0.35.1-py3-none-any.whl#sha256=eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de +# pip rfc3339-validator @ https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl#sha256=24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa +# pip terminado @ https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl#sha256=a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0 +# pip tinycss2 @ https://files.pythonhosted.org/packages/2c/4d/0db5b8a613d2a59bbc29bc5bb44a2f8070eb9ceab11c50d477502a8a0092/tinycss2-1.3.0-py3-none-any.whl#sha256=54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7 +# pip argon2-cffi-bindings @ https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae +# pip isoduration @ https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl#sha256=b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042 +# pip jsonschema-specifications @ https://files.pythonhosted.org/packages/ee/07/44bd408781594c4d0a027666ef27fab1e441b109dc3b76b4f836f8fd04fe/jsonschema_specifications-2023.12.1-py3-none-any.whl#sha256=87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c +# pip jupyter-server-terminals @ https://files.pythonhosted.org/packages/07/2d/2b32cdbe8d2a602f697a649798554e4f072115438e92249624e532e8aca6/jupyter_server_terminals-0.5.3-py3-none-any.whl#sha256=41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa +# pip jupyterlite-core @ https://files.pythonhosted.org/packages/05/d2/1d59d9a70d684b1eb3eb3a0b80a36b4e1d691e94af5d53aee56b1ad5240b/jupyterlite_core-0.3.0-py3-none-any.whl#sha256=247cc34ae6fedda41b15ce4778997164508b2039bc92480665cadfe955193467 +# pip pyzmq @ https://files.pythonhosted.org/packages/64/b8/1c181c13e118cabccfd25bd3e169e44958c649180b0d78b798a66899e08b/pyzmq-26.0.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=b3cd31f859b662ac5d7f4226ec7d8bd60384fa037fc02aee6ff0b53ba29a3ba8 +# pip argon2-cffi @ https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl#sha256=c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea +# pip jsonschema @ https://files.pythonhosted.org/packages/c8/2f/324fab4be6fe37fb7b521546e8a557e6cf08c1c1b3d0b4839a00f589d9ef/jsonschema-4.22.0-py3-none-any.whl#sha256=ff4cfd6b1367a40e7bc6411caec72effadd3db0bbe5017de188f2d6108335802 +# pip jupyter-client @ https://files.pythonhosted.org/packages/75/6d/d7b55b9c1ac802ab066b3e5015e90faab1fffbbd67a2af498ffc6cc81c97/jupyter_client-8.6.1-py3-none-any.whl#sha256=3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f +# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/83/bf/749279904094015d5cb7e030dd7a111f8b013b9f1809d954d04ebe0c1197/jupyterlite_pyodide_kernel-0.3.1-py3-none-any.whl#sha256=ac9d9dd95adcced57d465a7b298f220d8785845c017ad3abf2a3677ff02631c6 +# pip jupyter-events @ https://files.pythonhosted.org/packages/a5/94/059180ea70a9a326e1815176b2370da56376da347a796f8c4f0b830208ef/jupyter_events-0.10.0-py3-none-any.whl#sha256=4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960 +# pip nbformat @ https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl#sha256=3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b +# pip nbclient @ https://files.pythonhosted.org/packages/66/e8/00517a23d3eeaed0513e718fbc94aab26eaa1758f5690fc8578839791c79/nbclient-0.10.0-py3-none-any.whl#sha256=f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f +# pip nbconvert @ https://files.pythonhosted.org/packages/b8/bb/bb5b6a515d1584aa2fd89965b11db6632e4bdc69495a52374bcc36e56cfa/nbconvert-7.16.4-py3-none-any.whl#sha256=05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3 +# pip jupyter-server @ https://files.pythonhosted.org/packages/07/46/6bb926b3bf878bf687b952fb6a4c09d014b4575a25960f2cd1a61793763f/jupyter_server-2.14.0-py3-none-any.whl#sha256=fb6be52c713e80e004fac34b35a0990d6d36ba06fd0a2b2ed82b899143a64210 +# pip jupyterlab-server @ https://files.pythonhosted.org/packages/2f/b9/ed4ecad7cf1863a64920dc4c19b0376628b5d6bd28d2ec1e00cbac4ba2fb/jupyterlab_server-2.27.1-py3-none-any.whl#sha256=f5e26156e5258b24d532c84e7c74cc212e203bff93eb856f81c24c16daeecc75 +# pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/7c/c7/5c0f4dc5408122881a32b1809529d1d7adcc60cb176c7b50725910c328cc/jupyterlite_sphinx-0.14.0-py3-none-any.whl#sha256=144edf37e8a77f49b249dd57e3a22ce19ff87805ed79b460e831dc90bf38c269 diff --git a/build_tools/github/doc_min_dependencies_environment.yml b/build_tools/circle/doc_min_dependencies_environment.yml similarity index 54% rename from build_tools/github/doc_min_dependencies_environment.yml rename to build_tools/circle/doc_min_dependencies_environment.yml index 7b0ba5983304d..14f4485295455 100644 --- a/build_tools/github/doc_min_dependencies_environment.yml +++ b/build_tools/circle/doc_min_dependencies_environment.yml @@ -4,28 +4,33 @@ channels: - conda-forge dependencies: - - python=3.8 - - numpy=1.17.3 # min + - python=3.9 + - numpy=1.19.5 # min - blas - - scipy=1.3.2 # min - - cython=0.29.24 # min + - scipy=1.6.0 # min + - cython=3.0.10 # min - joblib - threadpoolctl - - matplotlib=3.1.3 # min - - pandas=1.0.5 # min + - matplotlib=3.3.4 # min + - pandas=1.1.5 # min - pyamg - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow - - scikit-image=0.16.2 # min + - pip + - ninja + - meson-python + - scikit-image=0.17.2 # min - seaborn - memory_profiler - compilers - - sphinx=4.0.1 # min - - sphinx-gallery=0.7.0 # min + - sphinx=6.0.0 # min + - sphinx-gallery=0.15.0 # min + - sphinx-copybutton=0.5.2 # min - numpydoc=1.2.0 # min - sphinx-prompt=1.3.0 # min - - plotly=5.10.0 # min + - plotly=5.14.0 # min + - polars=0.20.23 # min - pooch - pip - pip: diff --git a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock new file mode 100644 index 0000000000000..043587152c63b --- /dev/null +++ b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock @@ -0,0 +1,248 @@ +# Generated by conda-lock. +# platform: linux-64 +# input_hash: 08b61aae27c59a8d35d008fa2f947440f3cbcbc41622112e33e68f90d69b621c +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.2.2-hbcca054_0.conda#2f4327a1cbe7f022401b236e915a5fef +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h55db66e_0.conda#10569984e7db886e4f1abc2b47ad79a1 +https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h0223996_107.conda#851e9651c9e4cd5dc19f80398eba9a1c +https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h0223996_107.conda#167a1f5d77d8f3c2a638f7eb418429f1 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-hc0a3c3a_7.conda#53ebd4c833fa01cb2c6353e99f905406 +https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2024.1.0-ha957f24_692.conda#b35af3f0f25498f4e9fc4c471910346c +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4b3259a8ac6cdf0037752904da6a7 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h77fa898_7.conda#abf3fec87c2563697defa759dec3d639 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf +https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-ha885e6a_0.conda#800a4c872b5bc06fa83888d112fe6c4f +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-h4852527_0.conda#a05c7712be80622934f7011e0a1d43fc +https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.40-hdade7a5_3.conda#2d9a60578bc28469d9aeef9aea5520c3 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.11-hd590300_1.conda#0bb492cca54017ea314b809b1ee3a176 +https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 +https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 +https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-h59595ed_2.conda#985f2f453fb72408d6b6f1be0f324033 +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff +https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 +https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-h661eb56_2.conda#dd197c968bf9760bba0031888d431ede +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d +https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-h59595ed_2.conda#172bcc51059416e7ce99e7b528cede83 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-hca663fb_7.conda#c0bd771f09a326fdcd95a60b617795bf +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 +https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 +https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f +https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-12.3.0-hb8811af_7.conda#ee573415c47ce17f65101d0b3fba396d +https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad +https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0 +https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d +https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69 +https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72 +https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.0-hd590300_0.conda#c0f3abb4a16477208bbd43a39bd56f18 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 +https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 +https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15 +https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae +https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61 +https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.3.0-h58ffeeb_7.conda#95f78565a09852783d3e90e0389cfa5f +https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-h661eb56_2.conda#02e41ab5834dcdcc8590cf29d9526f50 +https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5 +https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 +https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d +https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-h59595ed_2.conda#b63d9b6da3653179a278077f0de20014 +https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_7.conda#1b84f26d9f4f6026e179e7805d5a15cd +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.3-h2797004_0.conda#b3316cbe90249da4f8e84cd66e1cc55b +https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c +https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-hc051c1a_0.conda#5d801a4906adc712d480afc362623b59 +https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-hf1915f5_4.conda#784a4df6676c581ca624fbe460703a6d +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.43-hcad00b1_0.conda#8292dea9e022d9610a11fce5e0896ed8 +https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209 +https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/gcc-12.3.0-h915e2ae_7.conda#84b1c5cebd0a0443f3d7f90a4be93fc6 +https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.3.0-h6477408_3.conda#7a53f84c45bdf4656ba27b9e9ed68b3d +https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-h59595ed_2.conda#219ba82e95d7614cf7140d2a4afc0926 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-h1645026_7.conda#2d9d4058c433c9ce2a811c76658c4efd +https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-h2a574ab_7.conda#265caa78b979f112fc241cecd0015c91 +https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.2-hf974151_0.conda#72724f6a78ecb15559396966226d5838 +https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.10.0-default_h2fb2949_1000.conda#7e3726e647a619c6ce5939014dfde86d +https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef +https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.5-hb77312f_0.conda#efd221d3668077ca067a206269418dec +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h1dd3fc0_3.conda#66f03896ffbe1a110ffda05c7a856504 +https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.5-ha31de31_0.conda#b923cdb6e567ada84f991ffcc5848afb +https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-hca2cd23_4.conda#1b50eebe2a738a3146c154d2eceaa8b6 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.100-hca3bf56_0.conda#949c4a82290ee58b3c970cef4bcfd4ad +https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-h8ee46fc_0.conda#077b6e8ad6a3ddb741fce2496dd01bec +https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb +https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a +https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.7.0-hd590300_1.conda#e9dffe1056994133616378309f932d77 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a +https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda#f3ad426304898027fc619827ff428eca +https://conda.anaconda.org/conda-forge/noarch/cloudpickle-3.0.0-pyhd8ed1ab_0.conda#753d29fe41bb881e4b9c004f0abf973f +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e +https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d +https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py39hf3d152e_1.tar.bz2#adb733ec2ee669f6d010758d054da60f +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d +https://conda.anaconda.org/conda-forge/noarch/fsspec-2024.3.1-pyhca7485f_0.conda#b7f0662ef2c9d4404f0af9eef5ed2fde +https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h915e2ae_7.conda#8efa768f7f74085629f3e1090e7f0569 +https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h617cb40_3.conda#3a9e5b8a6f651ff14e74d896d8f04ab6 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.2-hb6ce0ca_0.conda#a965aeaf060289528a3fbe09326edae2 +https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h915e2ae_7.conda#721c5433122a02bf3a081db10a2e68e2 +https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-h4a1b8e8_3.conda#9ec22c7c544f4a4f6d660f0a3b0fd15c +https://conda.anaconda.org/conda-forge/noarch/idna-3.7-pyhd8ed1ab_0.conda#c0cc1420498b17414d8617d0b9f506ca +https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8 +https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.5-default_h5d6823c_0.conda#60c39a00b694c98da03f67a3ba1d7499 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.49-h4f305b6_0.conda#dfcfd72c7a430d3616763ecfbefe4ca9 +https://conda.anaconda.org/conda-forge/linux-64/libpq-16.3-ha72fbe1_0.conda#bac737ae28b79cfbafd515258d97d29e +https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 +https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f +https://conda.anaconda.org/conda-forge/noarch/networkx-3.2-pyhd8ed1ab_0.conda#cec8cc498664cc00a070676aa89e69a7 +https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/platformdirs-4.2.2-pyhd8ed1ab_0.conda#6f6cf28bf8e021933869bae3f84b8fc9 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91 +https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.8-py39hd1e30aa_0.conda#ec86403fde8793ac1c36f8afa3d15902 +https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda#b7f5c092b8f9800150d998a71b76d5a1 +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 +https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad +https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.1-py39hd1e30aa_1.conda#37218233bcdc310e4fde6453bc1b40d8 +https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py39hf3d152e_1.tar.bz2#4252d0c211566a9f65149ba7f6e87aa4 +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 +https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.12.0-h00ab1b0_0.conda#f1b776cff1b426e7e7461a8502a3b731 +https://conda.anaconda.org/conda-forge/noarch/tenacity-8.3.0-pyhd8ed1ab_0.conda#216cfa8e32bcd1447646768351df6059 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.1-pyhd8ed1ab_0.conda#2fcb582444635e2c402e8569bb94e039 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.11.0-pyha770c72_0.conda#6ef2fc37559256cf682d8b3375e89b80 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 +https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530 +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e +https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_1.conda#28de2e073db9ca9b72858bee9fb6f571 +https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.3-py39hd1e30aa_0.conda#dc0fb8e157c7caba4c98f1e1f9d2e5f4 +https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_1.conda#cf4b0e7c4c78bb0662aed9b27c414a3c +https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.2-hf974151_0.conda#d427988dc3dbd0a4c136f52db356cc6a +https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.1.0-pyha770c72_0.conda#0896606848b2dc5cebdf111b6543aa04 +https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2 +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 +https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e +https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h662e7e4_0.conda#b32c0da42b1f24a98577bb3d7fc0b995 +https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-64/mkl-2024.1.0-ha957f24_692.conda#e7f5c5cda17c6f5047db27d44367c19d +https://conda.anaconda.org/conda-forge/noarch/partd-1.4.2-pyhd8ed1ab_0.conda#0badf9c54e24cecfb0ad2f99d680c163 +https://conda.anaconda.org/conda-forge/linux-64/pillow-10.3.0-py39h90c7501_0.conda#1e3b6af9592be71ce19f0a6aae05d97b +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.0-pyhd8ed1ab_0.conda#6a7bcc42ef58dd6cf3da9333ea102433 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 +https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_1.conda#d8d07866ac3b5b6937213c89a1874f08 +https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.3-haf2f30d_0.conda#f3df87cc9ef0b5113bff55aefcbcafd5 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.5.0-hfac3d4d_0.conda#f5126317dd0ce0ba26945e411ecc6960 +https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-7.1.0-hd8ed1ab_0.conda#6ef2b72d291b39e479d7694efa2b2b98 +https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-22_linux64_mkl.conda#eb6deb4ba6f92ea3f31c09cb8b764738 +https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2024.1.0-ha770c72_692.conda#56142862a71bcfdd6ef2ce95c8e90755 +https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b +https://conda.anaconda.org/conda-forge/noarch/dask-core-2024.5.0-pyhd8ed1ab_0.conda#8472f598970b9af96ca8106fa243ab67 +https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.3-h9ad1361_0.conda#8fb0e954c616bb0f9389efac4b4ed44b +https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-22_linux64_mkl.conda#d6f942423116553f068b2f2d93ffea2e +https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-22_linux64_mkl.conda#4edf2e7ce63920e4f539d12e32fb478e +https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.1-pyhd8ed1ab_0.conda#d15917f33140f8d2ac9ca44db7ec8a25 +https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b +https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-22_linux64_mkl.conda#aa0a5a70e1c957d5911e76ac98e471e1 +https://conda.anaconda.org/conda-forge/linux-64/numpy-1.19.5-py39hd249d9e_3.tar.bz2#0cf333996ebdeeba8d1c8c1c0ee9eff9 +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc9dc06e_21.conda#b325046180590c868ce0dbf267b82eb8 +https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-22_linux64_mkl.conda#3cb0e51433c88d2f4cdfb50c5c08a683 +https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-lite-2019.12.3-py39hd257fcd_5.tar.bz2#32dba66d6abc2b4b5b019c9e54307312 +https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.1-pyh4b66e23_0.conda#bcf6a6f4c6889ca083e8d33afbafb8d5 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.3.4-py39h2fa2bec_0.tar.bz2#9ec0b2186fab9121c54f4844f93ee5b7 +https://conda.anaconda.org/conda-forge/linux-64/pandas-1.1.5-py39hde0f152_0.tar.bz2#79fc4b5b3a865b90dd3701cecf1ad33c +https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.23-py39ha963410_0.conda#4871f09d653e979d598d2d4cd5fa868d +https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 +https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.3.0-py39hd257fcd_1.tar.bz2#c4b698994b2d8d2e659ae02202e6abe4 +https://conda.anaconda.org/conda-forge/linux-64/scipy-1.6.0-py39hee8e79c_0.tar.bz2#3afcb78281836e61351a2924f3230060 +https://conda.anaconda.org/conda-forge/linux-64/blas-2.122-mkl.conda#ead856637ff8a7feba572e2cf23b453b +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.3.4-py39hf3d152e_0.tar.bz2#cbaec993375a908bbe506dc7328d747c +https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39hac2352c_1.tar.bz2#6fb0628d6195d8b6caa2422d09296399 +https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42 +https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.13.2-py39hd257fcd_0.tar.bz2#bd7cdadf70e34a19333c3aacc40206e8 +https://conda.anaconda.org/conda-forge/noarch/tifffile-2020.6.3-py_0.tar.bz2#1fb771bb25b2eecbc73abf5143fa35bd +https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.17.2-py39hde0f152_4.tar.bz2#2a58a7e382317b03f023b2fddf40f8a1 +https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.2-hd8ed1ab_0.conda#50847a47c07812f88581081c620f5160 +https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb +https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 +https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.15.0-pyhd8ed1ab_0.conda#1a49ca9515ef9a96edff2eea06143dc6 +https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.5-pyhd8ed1ab_0.conda#7e1e7437273682ada2ed5e9e9714b140 +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda#26acae54b06f178681bfb551760f5dd1 +https://conda.anaconda.org/conda-forge/noarch/sphinx-6.0.0-pyhd8ed1ab_2.conda#ac1d3b55da1669ee3a56973054fd7efb +https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e +# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#sha256=a51f2604f9a5b6c0d25d3a88e694d5c02e20812dc0e482adf96c8628f9109357 diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py index dfcc600957469..345e08b4bece4 100755 --- a/build_tools/circle/list_versions.py +++ b/build_tools/circle/list_versions.py @@ -4,9 +4,9 @@ import json import re import sys +from urllib.request import urlopen from sklearn.utils.fixes import parse_version -from urllib.request import urlopen def json_urlread(url): diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh index 5b94211e4e30e..f959b8b65c85c 100755 --- a/build_tools/circle/push_doc.sh +++ b/build_tools/circle/push_doc.sh @@ -1,8 +1,8 @@ #!/bin/bash # This script is meant to be called in the "deploy" step defined in -# circle.yml. See https://circleci.com/docs/ for more details. +# .circleci/config.yml. See https://circleci.com/docs/ for more details. # The behavior of the script is controlled by environment variable defined -# in the circle.yml in the top level folder of the project. +# in the .circleci/config.yml file. set -ex @@ -56,7 +56,7 @@ then git rm -rf $dir/ && rm -rf $dir/ fi cp -R $GENERATED_DOC_DIR $dir -git config user.email "olivier.grisel+sklearn-ci@gmail.com" +git config user.email "ci@scikit-learn.org" git config user.name $USERNAME git config push.default matching git add -f $dir/ diff --git a/build_tools/circle/py39_conda_forge_linux-aarch64_conda.lock b/build_tools/circle/py39_conda_forge_linux-aarch64_conda.lock deleted file mode 100644 index 7a96250ccc682..0000000000000 --- a/build_tools/circle/py39_conda_forge_linux-aarch64_conda.lock +++ /dev/null @@ -1,89 +0,0 @@ -# Generated by conda-lock. -# platform: linux-aarch64 -# input_hash: 8cbd4b39fff3a0b91b6adc652e12de7b27aa74abb8b90e9d9aa0fc141dd28d84 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2022.9.24-h4fd8a4c_0.tar.bz2#831557fcf92cfc4353eb69fb95524b6c -https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.39-h16cd69b_1.conda#9daf385ebefaea92087d3a315e398964 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-12.2.0-hf695500_19.tar.bz2#bc890809e1f807b51bf04dfbee70ddf5 -https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-12.2.0-hc13a102_19.tar.bz2#981741cd4321edd5c504b48f74fe91f2 -https://conda.anaconda.org/conda-forge/linux-aarch64/python_abi-3.9-3_cp39.conda#b6f330b045cf3425945d536a6b5cd240 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2022f-h191b570_0.tar.bz2#e366350e2343a798e29833286abe2560 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-ng-12.2.0-he9431aa_19.tar.bz2#b5b34211bbf681bd3e7a5a4d80cce77b -https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#98a1185182fec3c434069fa74e6473d6 -https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-12.2.0-h607ecd0_19.tar.bz2#8456a29b6d9fc3123ccb9a966b6b2c49 -https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-hf897c2e_4.tar.bz2#2d787570a729e273a4e75775ddf3348a -https://conda.anaconda.org/conda-forge/linux-aarch64/jpeg-9e-h9cdd2b7_2.tar.bz2#8fd15daa7515a0fea9b3b68495118238 -https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.0.0-h4de3ea5_0.tar.bz2#1a0ffc65e03ce81559dbcb0695ad1476 -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.0.9-h4e544f5_8.tar.bz2#3cedc3935cfaa2a5303daa25fb12cb1d -https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.14-h4e544f5_0.tar.bz2#d98452637cbf62abad9140fa93365f94 -https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.2-h3557bc0_5.tar.bz2#dddd85f4d52121fab0a8b099c5e06501 -https://conda.anaconda.org/conda-forge/linux-aarch64/libhiredis-1.0.2-h05efe27_0.tar.bz2#a87f068744fd20334cd41489eb163bee -https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.0-hf897c2e_0.tar.bz2#36fdbc05c9d9145ece86f5a63c3f352e -https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.21-pthreads_h6cb6f83_3.tar.bz2#bc66302748a788c3bce59999ed6d737d -https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.32.1-hf897c2e_1000.tar.bz2#e038da5ef9095b0d79aac14a311394e7 -https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.2.4-h4e544f5_0.tar.bz2#9c307c3dba834b9529f6dcd95db543ed -https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.2.13-h4e544f5_4.tar.bz2#88596b6277fe6d39f046983aae6044db -https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.3-headf329_1.tar.bz2#486b68148e121bc8bbadc3cefae4c04f -https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.0.7-h4e544f5_0.tar.bz2#471ec2da6a894f9bf1d11141993ce8d0 -https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-hb9de7d4_1001.tar.bz2#d0183ec6ce0b5aaa3486df25fa5f0ded -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.9-h3557bc0_0.tar.bz2#e0c187f5ce240897762bbb89a8a407cc -https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.3-h3557bc0_0.tar.bz2#a6c9016ae1ca5c47a3603ed4cd65fedd -https://conda.anaconda.org/conda-forge/linux-aarch64/xz-5.2.6-h9cdd2b7_0.tar.bz2#83baad393a31d59c20b63ba4da6592df -https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-16_linuxaarch64_openblas.tar.bz2#188f02883567d5b7f96c7aa12e7007c9 -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.0.9-h4e544f5_8.tar.bz2#319956380b383ec9f6a46d585599c028 -https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.0.9-h4e544f5_8.tar.bz2#56a0a025208af24e2b43b2bbeee79802 -https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.39-hf9034f9_0.conda#5ec9052384a6ac85e9111e9ac7c5ec4c -https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.40.0-hf9034f9_0.tar.bz2#9afb0d5dbaa403858a660cd0b4a31d29 -https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.13-h3557bc0_1004.tar.bz2#cc973f5f452272c397546eac588cddb3 -https://conda.anaconda.org/conda-forge/linux-aarch64/llvm-openmp-15.0.5-hb2805f8_0.tar.bz2#a201123d5e268610c8c8b73d5f3f0536 -https://conda.anaconda.org/conda-forge/linux-aarch64/openblas-0.3.21-pthreads_h2d9dd7e_3.tar.bz2#17a824cf9bbf0e31998d2c1a2140204c -https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.1.2-h38e3740_0.tar.bz2#3cdbfb7d7b63ae2c2d35bb167d257ecd -https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.12-hd8af866_0.tar.bz2#7894e82ff743bd96c76585ddebe28e2a -https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.2-hc1e27d5_4.tar.bz2#f5627b0fef9a5267fd4d2ad5d8b5c1b3 -https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.0.9-h4e544f5_8.tar.bz2#0980429a0148a53edd0f1f207ec28a39 -https://conda.anaconda.org/conda-forge/linux-aarch64/ccache-4.7.3-hb064cd7_0.tar.bz2#8e71c7d1731d80d773cdafaa2ddcde50 -https://conda.anaconda.org/conda-forge/linux-aarch64/freetype-2.12.1-hbbbf32d_0.tar.bz2#3bfd4d79b5d93fa03f94e243d5f640d2 -https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-16_linuxaarch64_openblas.tar.bz2#520a3ecbebc63239c27dd6f70c2ababe -https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-16_linuxaarch64_openblas.tar.bz2#62990b2d1efc22d0beb394e893d39541 -https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.4.0-hacef7f3_4.tar.bz2#bf4778c9d0cf28b914a24d711b569335 -https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.9.15-hcd6f746_0_cpython.conda#4f20c6aad727bf0e2c9bb13a82f9a5fd -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.0.9-h4e544f5_8.tar.bz2#259d82bd990ba225508389509634b157 -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-aarch64/cython-0.29.32-py39h3d8bfb9_1.tar.bz2#f2289027c1793dc348cb50d8a99a57b9 -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.4-py39h110580c_1.tar.bz2#9c045502f6ab8c89bfda6be3c389e503 -https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.14-h5246980_0.tar.bz2#bc42d2aa9049730d4a75e7c6aa978f58 -https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-16_linuxaarch64_openblas.tar.bz2#97743bccc8b7edec0b9a726a8b80ecdf -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.23.5-py39hf5a3166_0.conda#1edf973a9f7a53a7cace6bf41f3dd51d -https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.0-h9b6de37_1.tar.bz2#3638647a2b0a7aa92be687fcc500af60 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.2-py39hb9a1dbb_1.tar.bz2#f5f4671e5e76b582263699cb4ab3172c -https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.0.0-py39h0fd3b05_0.tar.bz2#835f1a9631e600e0176593e95e85f73f -https://conda.anaconda.org/conda-forge/noarch/wheel-0.38.4-pyhd8ed1ab_0.tar.bz2#c829cfb8cb826acb9de0ac1a2df0a940 -https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-16_linuxaarch64_openblas.tar.bz2#5e5a376c40e95ab4b99519dfe6dc8912 -https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.0.6-py39hcdbe1fc_0.tar.bz2#825d87dfc6e062558494d09769b211de -https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.38.0-py39h0fd3b05_1.tar.bz2#c4eda904dc52f53c948d64d20662525f -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-9.2.0-py39hd8e725c_3.tar.bz2#b8984ef6c40a5e26472f07f18d910cc6 -https://conda.anaconda.org/conda-forge/noarch/pip-22.3.1-pyhd8ed1ab_0.tar.bz2#da66f2851b9836d3a7c5190082a45f7d -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.9.3-py39hc77f23a_2.tar.bz2#777bb5c46e3f56a96ceccf11c6332a60 -https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.116-openblas.tar.bz2#ded0db9695cd575ec1c68a68873363c5 -https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.6.2-py39h15a8d8b_0.tar.bz2#b6d1b0f734ac62c1d737a9f297aef8de -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.6.2-py39ha65689a_0.tar.bz2#b4d712f422b5dad5259f38151be6f492 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e diff --git a/build_tools/cirrus/arm_tests.yml b/build_tools/cirrus/arm_tests.yml new file mode 100644 index 0000000000000..09874e081b460 --- /dev/null +++ b/build_tools/cirrus/arm_tests.yml @@ -0,0 +1,34 @@ +linux_aarch64_test_task: + compute_engine_instance: + image_project: cirrus-images + image: family/docker-builder-arm64 + architecture: arm64 + platform: linux + cpu: 4 + memory: 6G + env: + CONDA_ENV_NAME: testenv + LOCK_FILE: build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock + CONDA_PKGS_DIRS: /root/.conda/pkgs + HOME: / # $HOME is not defined in image and is required to install mambaforge + # Upload tokens have been encrypted via the CirrusCI interface: + # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables + # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. + BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] + ccache_cache: + folder: /root/.cache/ccache + conda_cache: + folder: /root/.conda/pkgs + fingerprint_script: cat build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock + + install_python_script: | + # Install python so that update_tracking_issue has access to a Python + apt install -y python3 python-is-python3 + + test_script: | + bash build_tools/cirrus/build_test_arm.sh + # On success, this script is run updating the issue. + bash build_tools/cirrus/update_tracking_issue.sh true + + on_failure: + update_tracker_script: bash build_tools/cirrus/update_tracking_issue.sh false diff --git a/build_tools/cirrus/arm_wheel.yml b/build_tools/cirrus/arm_wheel.yml new file mode 100644 index 0000000000000..c3dfcfbc53ad9 --- /dev/null +++ b/build_tools/cirrus/arm_wheel.yml @@ -0,0 +1,76 @@ +linux_arm64_wheel_task: + compute_engine_instance: + image_project: cirrus-images + image: family/docker-builder-arm64 + architecture: arm64 + platform: linux + cpu: 4 + memory: 4G + env: + CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1 + SKLEARN_BUILD_PARALLEL=5 + CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh + CIBW_TEST_REQUIRES: pytest pandas threadpoolctl pytest-xdist + CIBW_BUILD_VERBOSITY: 1 + # Upload tokens have been encrypted via the CirrusCI interface: + # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables + # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. + BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] + matrix: + # Only the latest Python version is tested + - env: + CIBW_BUILD: cp39-manylinux_aarch64 + CIBW_TEST_SKIP: "*_aarch64" + - env: + CIBW_BUILD: cp310-manylinux_aarch64 + CIBW_TEST_SKIP: "*_aarch64" + - env: + CIBW_BUILD: cp311-manylinux_aarch64 + CIBW_TEST_SKIP: "*_aarch64" + - env: + CIBW_BUILD: cp312-manylinux_aarch64 + + cibuildwheel_script: + - apt install -y python3 python-is-python3 + - bash build_tools/wheels/build_wheels.sh + + on_failure: + update_tracker_script: + - bash build_tools/cirrus/update_tracking_issue.sh false + + wheels_artifacts: + path: "wheelhouse/*" + +# Update tracker when all jobs are successful +update_tracker_success: + depends_on: + - linux_arm64_wheel + container: + image: python:3.11 + # Only update tracker for nightly builds + only_if: $CIRRUS_CRON == "nightly" + update_script: + - bash build_tools/cirrus/update_tracking_issue.sh true + +wheels_upload_task: + depends_on: + - linux_arm64_wheel + container: + image: continuumio/miniconda3:22.11.1 + # Artifacts are not uploaded on PRs + only_if: $CIRRUS_PR == "" + env: + # Upload tokens have been encrypted via the CirrusCI interface: + # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables + SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ENCRYPTED[9cf0529227577d503f2e19ef31cb690a2272cb243a217fb9a1ceda5cc608e8ccc292050fde9dca94cab766e1dd418519] + SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ENCRYPTED[8fade46af37fa645e57bd1ee21683337aa369ba56f6307ce13889f1e74df94e5bdd21d323baac21e332fd87b8949659a] + ARTIFACTS_PATH: wheelhouse + upload_script: | + conda install curl unzip -y + + # Download and show wheels + curl https://api.cirrus-ci.com/v1/artifact/build/$CIRRUS_BUILD_ID/wheels.zip --output wheels.zip + unzip wheels.zip + ls wheelhouse + + bash build_tools/github/upload_anaconda.sh diff --git a/build_tools/circle/build_test_arm.sh b/build_tools/cirrus/build_test_arm.sh similarity index 84% rename from build_tools/circle/build_test_arm.sh rename to build_tools/cirrus/build_test_arm.sh index 3b1979793f853..551dc3689e010 100755 --- a/build_tools/circle/build_test_arm.sh +++ b/build_tools/cirrus/build_test_arm.sh @@ -22,13 +22,13 @@ setup_ccache() { ccache -M 0 } -MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-aarch64.sh" +MAMBAFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-aarch64.sh" # Install Mambaforge -wget $MINICONDA_URL -O mambaforge.sh -MINICONDA_PATH=$HOME/miniconda -chmod +x mambaforge.sh && ./mambaforge.sh -b -p $MINICONDA_PATH -export PATH=$MINICONDA_PATH/bin:$PATH +curl -L --retry 10 $MAMBAFORGE_URL -o mambaforge.sh +MAMBAFORGE_PATH=$HOME/mambaforge +bash ./mambaforge.sh -b -p $MAMBAFORGE_PATH +export PATH=$MAMBAFORGE_PATH/bin:$PATH mamba init --all --verbose mamba update --yes mamba mamba update --yes conda diff --git a/build_tools/circle/py39_conda_forge_environment.yml b/build_tools/cirrus/pymin_conda_forge_environment.yml similarity index 84% rename from build_tools/circle/py39_conda_forge_environment.yml rename to build_tools/cirrus/pymin_conda_forge_environment.yml index a8fcfdeebf5f5..684c4636daad4 100644 --- a/build_tools/circle/py39_conda_forge_environment.yml +++ b/build_tools/cirrus/pymin_conda_forge_environment.yml @@ -12,8 +12,11 @@ dependencies: - joblib - threadpoolctl - matplotlib - - pytest - - pytest-xdist=2.5.0 + - pytest<8 + - pytest-xdist - pillow - pip + - ninja + - meson-python + - pip - ccache diff --git a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock new file mode 100644 index 0000000000000..660bc9de9ecda --- /dev/null +++ b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock @@ -0,0 +1,94 @@ +# Generated by conda-lock. +# platform: linux-aarch64 +# input_hash: 80459c6003cbcd22780a22a62ed5cc116e951d5c2c14602af1281434263b9138 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2024.2.2-hcefe29a_0.conda#57c226edb90c4e973b9b7503537dd339 +https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.40-hba4e955_0.conda#b55c1cb33c63d23b542fa53f24541e56 +https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-13.2.0-h3f4de04_7.conda#2a54872c7fab2db99b0074212d8efe64 +https://conda.anaconda.org/conda-forge/linux-aarch64/python_abi-3.9-4_cp39.conda#c191905a08694e4a5cb1238e90233878 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 +https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#98a1185182fec3c434069fa74e6473d6 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-13.2.0-he277a41_7.conda#01c5b27ce46f50abab2dc8454842c792 +https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h31becfc_5.conda#a64e35f01e0b7a2a152eca87d33b9c87 +https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.0.0-h4de3ea5_0.tar.bz2#1a0ffc65e03ce81559dbcb0695ad1476 +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.1.0-h31becfc_1.conda#1b219fd801eddb7a94df5bd001053ad9 +https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.20-h31becfc_0.conda#018592a3d691662f451f89d0de474a20 +https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.2-h3557bc0_5.tar.bz2#dddd85f4d52121fab0a8b099c5e06501 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-13.2.0-h87d9d71_7.conda#423eb7de085dd6b46928723edf5f8767 +https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-3.0.0-h31becfc_1.conda#ed24e702928be089d9ba3f05618515c6 +https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.1-h31becfc_0.conda#c14f32510f694e3185704d89967ec422 +https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda#000e30b09db0b7c775b21695dff30969 +https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.4.0-h31becfc_0.conda#5fd7ab3e5f382c70607fbac6335e6e19 +https://conda.anaconda.org/conda-forge/linux-aarch64/libxcrypt-4.4.36-h31becfc_1.conda#b4df5d7d4b63579d081fd3a4cf99740e +https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.2.13-h31becfc_5.conda#b213aa87eea9491ef7b129179322e955 +https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.5-h0425590_0.conda#38362af7bfac0efef69675acee564458 +https://conda.anaconda.org/conda-forge/linux-aarch64/ninja-1.12.1-h70be974_0.conda#216635cea46498d8045c7cf0f03eaf72 +https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.3.0-h31becfc_0.conda#36ca60a3afaf2ea2c460daeebd67430e +https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-hb9de7d4_1001.tar.bz2#d0183ec6ce0b5aaa3486df25fa5f0ded +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.11-h31becfc_0.conda#13de34f69cb73165dbe08c1e9148bedb +https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.3-h3557bc0_0.tar.bz2#a6c9016ae1ca5c47a3603ed4cd65fedd +https://conda.anaconda.org/conda-forge/linux-aarch64/xz-5.2.6-h9cdd2b7_0.tar.bz2#83baad393a31d59c20b63ba4da6592df +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.1.0-h31becfc_1.conda#8db7cff89510bec0b863a0a8ee6a7bce +https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.1.0-h31becfc_1.conda#ad3d3a826b5848d99936e4466ebbaa26 +https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-ng-13.2.0-he9431aa_7.conda#d714db6ba9d67d55d21cf96316714ec8 +https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.43-h194ca79_0.conda#1123e504d9254dd9494267ab9aba95f0 +https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.45.3-h194ca79_0.conda#fb35b8afbe9e92467ac7b5608d60b775 +https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.15-h2a766a3_0.conda#eb3d8c8170e3d03f2564ed2024aa00c8 +https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8fc344f_1.conda#105eb1e16bf83bfb2eb380a48032b655 +https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.13-h194ca79_0.conda#f75105e0585851f818e0009dd1dde4dc +https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.6-h02f22dd_0.conda#be8d5f8cf21aed237b8b182ea86b3dd6 +https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.1.0-h31becfc_1.conda#9e4a13596ab651ea8d77aae023d0ce3f +https://conda.anaconda.org/conda-forge/linux-aarch64/freetype-2.12.1-hf0a5ef3_2.conda#a5ab74c5bd158c3d5532b66d8d83d907 +https://conda.anaconda.org/conda-forge/linux-aarch64/libhiredis-1.0.2-h05efe27_0.tar.bz2#a87f068744fd20334cd41489eb163bee +https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.27-pthreads_h5a5ec62_0.conda#ffecca8f4f31cd50b92c0e6e6bfe4416 +https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.6.0-hf980d43_3.conda#b6f3abf5726ae33094bee238b4eb492f +https://conda.anaconda.org/conda-forge/linux-aarch64/llvm-openmp-18.1.5-h767c9be_0.conda#a9c2771c36671707f1992e4d0c32aa54 +https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.9.19-h4ac3b42_0_cpython.conda#1501507cd9451472ec8900d587ce872f +https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.1.0-h31becfc_1.conda#e41f5862ac746428407f3fd44d2ed01f +https://conda.anaconda.org/conda-forge/linux-aarch64/ccache-4.9.1-h6552966_0.conda#758b202f61f6bbfd2c6adf0fde043276 +https://conda.anaconda.org/conda-forge/noarch/certifi-2024.2.2-pyhd8ed1ab_0.conda#0876280e409658fc6f9e75d035960333 +https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 +https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441 +https://conda.anaconda.org/conda-forge/linux-aarch64/cython-3.0.10-py39h387a81e_0.conda#0e917a89f77c978d152099357bd75b22 +https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa +https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46 +https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 +https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.5-py39had2cf8c_1.conda#ddb99610f7b950fdd5ff2aff19136363 +https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.16-h922389a_0.conda#ffdd8267a04c515e7ce69c727b051414 +https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-22_linuxaarch64_openblas.conda#068ab33f2382cda4dd0b72a715ad33b5 +https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 +https://conda.anaconda.org/conda-forge/linux-aarch64/openblas-0.3.27-pthreads_h339cbfa_0.conda#cb06c34a3056f59e9e244c20836add8a +https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.2-h0d9d63b_0.conda#fd2898519e839d5ceb778343f39a3176 +https://conda.anaconda.org/conda-forge/noarch/packaging-24.0-pyhd8ed1ab_0.conda#248f521b64ce055e7feae3105e7abeb8 +https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf +https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.2-pyhd8ed1ab_0.conda#b9a4dacf97241704529131a0dfc0494f +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.5.1-pyhd8ed1ab_0.conda#7462280d81f639363e6e63c81276bd9e +https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd +https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 +https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.4-py39h7cc1d5f_0.conda#2c06a653ebfa389c18aea2d8f338df3b +https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.1.0-py39h898b7ef_0.conda#8c072c9329aeea97a46005625267a851 +https://conda.anaconda.org/conda-forge/noarch/wheel-0.43.0-pyhd8ed1ab_1.conda#0b5293a157c2b5cd513dd1b03d8d3aae +https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a +https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.51.0-py39h898b7ef_0.conda#7b6a069c66a729454fb4c534ed145dcd +https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda#c5d3907ad8bd7bf557521a1833cf7e6d +https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f +https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-22_linuxaarch64_openblas.conda#fbe7fe553f2cc78a0311e009b26f180d +https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-22_linuxaarch64_openblas.conda#8c709d281609792c39b1d5c0241f90f1 +https://conda.anaconda.org/conda-forge/noarch/meson-1.4.0-pyhd8ed1ab_0.conda#52a0660cfa40b45bf254ecc3374cb2e0 +https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-10.3.0-py39h71661b1_0.conda#dae548b7b537d7ef796d1d4c38a55319 +https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 +https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47 +https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.4-pyhd8ed1ab_0.conda#a9d145de8c5f064b5fa68fb34725d9f4 +https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c +https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.0-pyhd8ed1ab_0.conda#dcbadab7a68738a028e195ab68ab2d2e +https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-22_linuxaarch64_openblas.conda#5acf669e0be669f30f4b813d2ecda7b8 +https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547 +https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.26.4-py39h91c28bb_0.conda#d88e195f11a9f27e649aea408b54cb48 +https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.5.0-pyhd8ed1ab_0.conda#d5f595da2daead898ca958ac62f0307b +https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-22_linuxaarch64_openblas.conda#a5b77b6c6807661afd716f33e85814b3 +https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.2.1-py39hd16970a_0.conda#66b9718539ecdd38876b0176c315bcad +https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.13.0-py39hb921187_1.conda#2717303c0d13a5646308b3763bf4daa4 +https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.122-openblas.conda#65bc48b3bc85f8eeeab54311443a83aa +https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.8.4-py39h8e43113_0.conda#f397ddfe5c551732de61a92106a14cf3 +https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.8.4-py39ha65689a_0.conda#d501bb96ff505fdd431fd8fdac8efbf9 diff --git a/build_tools/cirrus/update_tracking_issue.sh b/build_tools/cirrus/update_tracking_issue.sh new file mode 100644 index 0000000000000..9166210ac0007 --- /dev/null +++ b/build_tools/cirrus/update_tracking_issue.sh @@ -0,0 +1,22 @@ +# Update tracking issue if Cirrus fails nightly job + +if [[ "$CIRRUS_CRON" != "nightly" ]]; then + exit 0 +fi + +# TEST_PASSED is either "true" or "false" +TEST_PASSED="$1" + +python -m venv .venv +source .venv/bin/activate +python -m pip install defusedxml PyGithub + +LINK_TO_RUN="https://cirrus-ci.com/build/$CIRRUS_BUILD_ID" + +python maint_tools/update_tracking_issue.py \ + $BOT_GITHUB_TOKEN \ + $CIRRUS_TASK_NAME \ + $CIRRUS_REPO_FULL_NAME \ + $LINK_TO_RUN \ + --tests-passed $TEST_PASSED \ + --auto-close false diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py index 02ec6d2713b80..483dc3739506e 100644 --- a/build_tools/generate_authors_table.py +++ b/build_tools/generate_authors_table.py @@ -6,12 +6,14 @@ The table should be updated for each new inclusion in the teams. Generating the table requires admin rights. """ -import sys -import requests + import getpass +import sys import time -from pathlib import Path from os import path +from pathlib import Path + +import requests print("user:", file=sys.stderr) user = input() @@ -42,17 +44,24 @@ def get_contributors(): """Get the list of contributor profiles. Require admin rights.""" # get core devs and contributor experience team core_devs = [] + documentation_team = [] contributor_experience_team = [] comm_team = [] core_devs_slug = "core-devs" contributor_experience_team_slug = "contributor-experience-team" comm_team_slug = "communication-team" + documentation_team_slug = "documentation-team" entry_point = "https://api.github.com/orgs/scikit-learn/" for team_slug, lst in zip( - (core_devs_slug, contributor_experience_team_slug, comm_team_slug), - (core_devs, contributor_experience_team, comm_team), + ( + core_devs_slug, + contributor_experience_team_slug, + comm_team_slug, + documentation_team_slug, + ), + (core_devs, contributor_experience_team, comm_team, documentation_team), ): for page in [1, 2]: # 30 per page reply = get(f"{entry_point}teams/{team_slug}/members?page={page}") @@ -66,6 +75,7 @@ def get_contributors(): # keep only the logins core_devs = set(c["login"] for c in core_devs) + documentation_team = set(c["login"] for c in documentation_team) contributor_experience_team = set(c["login"] for c in contributor_experience_team) comm_team = set(c["login"] for c in comm_team) members = set(c["login"] for c in members) @@ -75,16 +85,28 @@ def get_contributors(): # add missing contributors without GitHub accounts members |= {"Angel Soler Gollonet"} # remove CI bots - members -= {"sklearn-ci", "sklearn-wheels"} + members -= {"sklearn-ci", "sklearn-wheels", "sklearn-lgtm"} contributor_experience_team -= ( core_devs # remove ogrisel from contributor_experience_team ) - emeritus = members - core_devs - contributor_experience_team - comm_team + emeritus = ( + members + - core_devs + - contributor_experience_team + - comm_team + - documentation_team + ) # hard coded + emeritus_contributor_experience_team = { + "cmarmo", + } emeritus_comm_team = {"reshamas"} + # Up-to-now, we can subtract the team emeritus from the original emeritus + emeritus -= emeritus_contributor_experience_team | emeritus_comm_team + comm_team -= {"reshamas"} # in the comm team but not on the web page # get profiles from GitHub @@ -93,13 +115,21 @@ def get_contributors(): contributor_experience_team = [ get_profile(login) for login in contributor_experience_team ] + emeritus_contributor_experience_team = [ + get_profile(login) for login in emeritus_contributor_experience_team + ] comm_team = [get_profile(login) for login in comm_team] emeritus_comm_team = [get_profile(login) for login in emeritus_comm_team] + documentation_team = [get_profile(login) for login in documentation_team] # sort by last name core_devs = sorted(core_devs, key=key) emeritus = sorted(emeritus, key=key) contributor_experience_team = sorted(contributor_experience_team, key=key) + emeritus_contributor_experience_team = sorted( + emeritus_contributor_experience_team, key=key + ) + documentation_team = sorted(documentation_team, key=key) comm_team = sorted(comm_team, key=key) emeritus_comm_team = sorted(emeritus_comm_team, key=key) @@ -107,8 +137,10 @@ def get_contributors(): core_devs, emeritus, contributor_experience_team, + emeritus_contributor_experience_team, comm_team, emeritus_comm_team, + documentation_team, ) @@ -161,31 +193,34 @@ def generate_table(contributors): lines.append("

%s

" % (contributor["name"],)) lines.append(" ") lines.append(" ") - return "\n".join(lines) + return "\n".join(lines) + "\n" def generate_list(contributors): lines = [] for contributor in contributors: lines.append("- %s" % (contributor["name"],)) - return "\n".join(lines) + return "\n".join(lines) + "\n" if __name__ == "__main__": - ( core_devs, emeritus, contributor_experience_team, + emeritus_contributor_experience_team, comm_team, emeritus_comm_team, + documentation_team, ) = get_contributors() - with open(REPO_FOLDER / "doc" / "authors.rst", "w+", encoding="utf-8") as rst_file: + with open( + REPO_FOLDER / "doc" / "maintainers.rst", "w+", encoding="utf-8" + ) as rst_file: rst_file.write(generate_table(core_devs)) with open( - REPO_FOLDER / "doc" / "authors_emeritus.rst", "w+", encoding="utf-8" + REPO_FOLDER / "doc" / "maintainers_emeritus.rst", "w+", encoding="utf-8" ) as rst_file: rst_file.write(generate_list(emeritus)) @@ -194,6 +229,13 @@ def generate_list(contributors): ) as rst_file: rst_file.write(generate_table(contributor_experience_team)) + with open( + REPO_FOLDER / "doc" / "contributor_experience_team_emeritus.rst", + "w+", + encoding="utf-8", + ) as rst_file: + rst_file.write(generate_list(emeritus_contributor_experience_team)) + with open( REPO_FOLDER / "doc" / "communication_team.rst", "w+", encoding="utf-8" ) as rst_file: @@ -203,3 +245,8 @@ def generate_list(contributors): REPO_FOLDER / "doc" / "communication_team_emeritus.rst", "w+", encoding="utf-8" ) as rst_file: rst_file.write(generate_list(emeritus_comm_team)) + + with open( + REPO_FOLDER / "doc" / "documentation_team.rst", "w+", encoding="utf-8" + ) as rst_file: + rst_file.write(generate_table(documentation_team)) diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py new file mode 100644 index 0000000000000..b357c68f23e3e --- /dev/null +++ b/build_tools/get_comment.py @@ -0,0 +1,356 @@ +# This script is used to generate a comment for a PR when linting issues are +# detected. It is used by the `Comment on failed linting` GitHub Action. +# This script fails if there are not comments to be posted. + +import os + +import requests + + +def get_versions(versions_file): + """Get the versions of the packages used in the linter job. + + Parameters + ---------- + versions_file : str + The path to the file that contains the versions of the packages. + + Returns + ------- + versions : dict + A dictionary with the versions of the packages. + """ + with open("versions.txt", "r") as f: + return dict(line.strip().split("=") for line in f) + + +def get_step_message(log, start, end, title, message, details): + """Get the message for a specific test. + + Parameters + ---------- + log : str + The log of the linting job. + + start : str + The string that marks the start of the test. + + end : str + The string that marks the end of the test. + + title : str + The title for this section. + + message : str + The message to be added at the beginning of the section. + + details : bool + Whether to add the details of each step. + + Returns + ------- + message : str + The message to be added to the comment. + """ + if end not in log: + return "" + res = ( + "-----------------------------------------------\n" + + f"### {title}\n\n" + + message + + "\n\n" + ) + if details: + res += ( + "
\n\n```\n" + + log[log.find(start) + len(start) + 1 : log.find(end) - 1] + + "\n```\n\n
\n\n" + ) + return res + + +def get_message(log_file, repo, pr_number, sha, run_id, details, versions): + with open(log_file, "r") as f: + log = f.read() + + sub_text = ( + "\n\n _Generated for commit:" + f" [{sha[:7]}](https://github.com/{repo}/pull/{pr_number}/commits/{sha}). " + "Link to the linter CI: [here]" + f"(https://github.com/{repo}/actions/runs/{run_id})_ " + ) + + if "### Linting completed ###" not in log: + return ( + "## ❌ Linting issues\n\n" + "There was an issue running the linter job. Please update with " + "`upstream/main` ([link](" + "https://scikit-learn.org/dev/developers/contributing.html" + "#how-to-contribute)) and push the changes. If you already have done " + "that, please send an empty commit with `git commit --allow-empty` " + "and push the changes to trigger the CI.\n\n" + sub_text + ) + + message = "" + + # black + message += get_step_message( + log, + start="### Running black ###", + end="Problems detected by black", + title="`black`", + message=( + "`black` detected issues. Please run `black .` locally and push " + "the changes. Here you can see the detected issues. Note that " + "running black might also fix some of the issues which might be " + "detected by `ruff`. Note that the installed `black` version is " + f"`black={versions['black']}`." + ), + details=details, + ) + + # ruff + message += get_step_message( + log, + start="### Running ruff ###", + end="Problems detected by ruff", + title="`ruff`", + message=( + "`ruff` detected issues. Please run " + "`ruff check --fix --output-format=full .` locally, fix the remaining " + "issues, and push the changes. Here you can see the detected issues. Note " + f"that the installed `ruff` version is `ruff={versions['ruff']}`." + ), + details=details, + ) + + # mypy + message += get_step_message( + log, + start="### Running mypy ###", + end="Problems detected by mypy", + title="`mypy`", + message=( + "`mypy` detected issues. Please fix them locally and push the changes. " + "Here you can see the detected issues. Note that the installed `mypy` " + f"version is `mypy={versions['mypy']}`." + ), + details=details, + ) + + # cython-lint + message += get_step_message( + log, + start="### Running cython-lint ###", + end="Problems detected by cython-lint", + title="`cython-lint`", + message=( + "`cython-lint` detected issues. Please fix them locally and push " + "the changes. Here you can see the detected issues. Note that the " + "installed `cython-lint` version is " + f"`cython-lint={versions['cython-lint']}`." + ), + details=details, + ) + + # deprecation order + message += get_step_message( + log, + start="### Checking for bad deprecation order ###", + end="Problems detected by deprecation order check", + title="Deprecation Order", + message=( + "Deprecation order check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + # doctest directives + message += get_step_message( + log, + start="### Checking for default doctest directives ###", + end="Problems detected by doctest directive check", + title="Doctest Directives", + message=( + "doctest directive check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + # joblib imports + message += get_step_message( + log, + start="### Checking for joblib imports ###", + end="Problems detected by joblib import check", + title="Joblib Imports", + message=( + "`joblib` import check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + if not message: + # no issues detected, so this script "fails" + return ( + "## ✔️ Linting Passed\n" + "All linting checks passed. Your pull request is in excellent shape! ☀️" + + sub_text + ) + + if not details: + # This happens if posting the log fails, which happens if the log is too + # long. Typically, this happens if the PR branch hasn't been updated + # since we've introduced import sorting. + branch_not_updated = ( + "_Merging with `upstream/main` might fix / improve the issues if you " + "haven't done that since 21.06.2023._\n\n" + ) + else: + branch_not_updated = "" + + message = ( + "## ❌ Linting issues\n\n" + + branch_not_updated + + "This PR is introducing linting issues. Here's a summary of the issues. " + + "Note that you can avoid having linting issues by enabling `pre-commit` " + + "hooks. Instructions to enable them can be found [here](" + + "https://scikit-learn.org/dev/developers/contributing.html#how-to-contribute)" + + ".\n\n" + + "You can see the details of the linting issues under the `lint` job [here]" + + f"(https://github.com/{repo}/actions/runs/{run_id})\n\n" + + message + + sub_text + ) + + return message + + +def get_headers(token): + """Get the headers for the GitHub API.""" + return { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + } + + +def find_lint_bot_comments(repo, token, pr_number): + """Get the comment from the linting bot.""" + # repo is in the form of "org/repo" + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments # noqa + response = requests.get( + f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", + headers=get_headers(token), + ) + response.raise_for_status() + all_comments = response.json() + + failed_comment = "❌ Linting issues" + success_comment = "✔️ Linting Passed" + + # Find all comments that match the linting bot, and return the first one. + # There should always be only one such comment, or none, if the PR is + # just created. + comments = [ + comment + for comment in all_comments + if comment["user"]["login"] == "github-actions[bot]" + and (failed_comment in comment["body"] or success_comment in comment["body"]) + ] + + if len(all_comments) > 25 and not comments: + # By default the API returns the first 30 comments. If we can't find the + # comment created by the bot in those, then we raise and we skip creating + # a comment in the first place. + raise RuntimeError("Comment not found in the first 30 comments.") + + return comments[0] if comments else None + + +def create_or_update_comment(comment, message, repo, pr_number, token): + """Create a new comment or update existing one.""" + # repo is in the form of "org/repo" + if comment is not None: + print("updating existing comment") + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment # noqa + response = requests.patch( + f"https://api.github.com/repos/{repo}/issues/comments/{comment['id']}", + headers=get_headers(token), + json={"body": message}, + ) + else: + print("creating new comment") + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment # noqa + response = requests.post( + f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", + headers=get_headers(token), + json={"body": message}, + ) + + response.raise_for_status() + + +if __name__ == "__main__": + repo = os.environ["GITHUB_REPOSITORY"] + token = os.environ["GITHUB_TOKEN"] + pr_number = os.environ["PR_NUMBER"] + sha = os.environ["BRANCH_SHA"] + log_file = os.environ["LOG_FILE"] + run_id = os.environ["RUN_ID"] + versions_file = os.environ["VERSIONS_FILE"] + + versions = get_versions(versions_file) + + if not repo or not token or not pr_number or not log_file or not run_id: + raise ValueError( + "One of the following environment variables is not set: " + "GITHUB_REPOSITORY, GITHUB_TOKEN, PR_NUMBER, LOG_FILE, RUN_ID" + ) + + try: + comment = find_lint_bot_comments(repo, token, pr_number) + except RuntimeError: + print("Comment not found in the first 30 comments. Skipping!") + exit(0) + + try: + message = get_message( + log_file, + repo=repo, + pr_number=pr_number, + sha=sha, + run_id=run_id, + details=True, + versions=versions, + ) + create_or_update_comment( + comment=comment, + message=message, + repo=repo, + pr_number=pr_number, + token=token, + ) + print(message) + except requests.HTTPError: + # The above fails if the message is too long. In that case, we + # try again without the details. + message = get_message( + log_file, + repo=repo, + pr_number=pr_number, + sha=sha, + run_id=run_id, + details=False, + versions=versions, + ) + create_or_update_comment( + comment=comment, + message=message, + repo=repo, + pr_number=pr_number, + token=token, + ) + print(message) diff --git a/build_tools/github/Windows b/build_tools/github/Windows index 5ba35f790ca5e..a9971aa525581 100644 --- a/build_tools/github/Windows +++ b/build_tools/github/Windows @@ -3,12 +3,10 @@ ARG PYTHON_VERSION FROM winamd64/python:$PYTHON_VERSION-windowsservercore ARG WHEEL_NAME -ARG CONFTEST_NAME ARG CIBW_TEST_REQUIRES # Copy and install the Windows wheel COPY $WHEEL_NAME $WHEEL_NAME -COPY $CONFTEST_NAME $CONFTEST_NAME RUN pip install $env:WHEEL_NAME # Install the testing dependencies diff --git a/build_tools/github/build_minimal_windows_image.sh b/build_tools/github/build_minimal_windows_image.sh index 4399bfa80704e..2995b6906c535 100755 --- a/build_tools/github/build_minimal_windows_image.sh +++ b/build_tools/github/build_minimal_windows_image.sh @@ -14,10 +14,12 @@ cp $WHEEL_PATH $WHEEL_NAME # Dot the Python version for identyfing the base Docker image PYTHON_VERSION=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2}) +if [[ "$CIBW_PRERELEASE_PYTHONS" == "True" ]]; then + PYTHON_VERSION="$PYTHON_VERSION-rc" +fi # Build a minimal Windows Docker image for testing the wheels docker build --build-arg PYTHON_VERSION=$PYTHON_VERSION \ --build-arg WHEEL_NAME=$WHEEL_NAME \ - --build-arg CONFTEST_NAME=$CONFTEST_NAME \ --build-arg CIBW_TEST_REQUIRES="$CIBW_TEST_REQUIRES" \ -f build_tools/github/Windows \ -t scikit-learn/minimal-windows . diff --git a/build_tools/github/build_source.sh b/build_tools/github/build_source.sh index a4d9c7bd05387..ec53284012fa4 100755 --- a/build_tools/github/build_source.sh +++ b/build_tools/github/build_source.sh @@ -11,10 +11,10 @@ python -m venv build_env source build_env/bin/activate python -m pip install numpy scipy cython -python -m pip install twine +python -m pip install twine build cd scikit-learn/scikit-learn -python setup.py sdist +python -m build --sdist # Check whether the source distribution will render correctly twine check dist/*.tar.gz diff --git a/build_tools/github/build_wheels.sh b/build_tools/github/build_wheels.sh deleted file mode 100755 index 647b47492774b..0000000000000 --- a/build_tools/github/build_wheels.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -set -e -set -x - -# OpenMP is not present on macOS by default -if [[ "$RUNNER_OS" == "macOS" ]]; then - # Make sure to use a libomp version binary compatible with the oldest - # supported version of the macos SDK as libomp will be vendored into the - # scikit-learn wheels for macos. - - if [[ "$CIBW_BUILD" == *-macosx_arm64 ]]; then - # arm64 builds must cross compile because CI is on x64 - export PYTHON_CROSSENV=1 - # SciPy requires 12.0 on arm to prevent kernel panics - # https://github.com/scipy/scipy/issues/14688 - # We use the same deployment target to match SciPy. - export MACOSX_DEPLOYMENT_TARGET=12.0 - OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2" - else - export MACOSX_DEPLOYMENT_TARGET=10.9 - OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2" - fi - - sudo conda create -n build $OPENMP_URL - PREFIX="/usr/local/miniconda/envs/build" - - export CC=/usr/bin/clang - export CXX=/usr/bin/clang++ - export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp" - export CFLAGS="$CFLAGS -I$PREFIX/include" - export CXXFLAGS="$CXXFLAGS -I$PREFIX/include" - export LDFLAGS="$LDFLAGS -Wl,-rpath,$PREFIX/lib -L$PREFIX/lib -lomp" -fi - -# The version of the built dependencies are specified -# in the pyproject.toml file, while the tests are run -# against the most recent version of the dependencies - -python -m pip install cibuildwheel -python -m cibuildwheel --output-dir wheelhouse diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py index ef9bd77254fb5..5579d86c5ce3e 100644 --- a/build_tools/github/check_wheels.py +++ b/build_tools/github/check_wheels.py @@ -1,8 +1,10 @@ """Checks that dist/* contains the number of wheels built from the .github/workflows/wheels.yml config.""" -import yaml -from pathlib import Path + import sys +from pathlib import Path + +import yaml gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml" with gh_wheel_path.open("r") as f: @@ -14,14 +16,12 @@ # plus one more for the sdist n_wheels += 1 -# aarch64 builds from travis -travis_config_path = Path.cwd() / ".travis.yml" -with travis_config_path.open("r") as f: - travis_config = yaml.safe_load(f) +# arm64 builds from cirrus +cirrus_path = Path.cwd() / "build_tools" / "cirrus" / "arm_wheel.yml" +with cirrus_path.open("r") as f: + cirrus_config = yaml.safe_load(f) -jobs = travis_config["jobs"]["include"] -travis_builds = [j for j in jobs if any("CIBW_BUILD" in env for env in j["env"])] -n_wheels += len(travis_builds) +n_wheels += len(cirrus_config["linux_arm64_wheel_task"]["matrix"]) dist_files = list(Path("dist").glob("**/*")) n_dist_files = len(dist_files) diff --git a/build_tools/github/doc_linux-64_conda.lock b/build_tools/github/doc_linux-64_conda.lock deleted file mode 100644 index afd5b30297635..0000000000000 --- a/build_tools/github/doc_linux-64_conda.lock +++ /dev/null @@ -1,235 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: 9badce0c7156caf1e39ce0f87c6af2ee57af251763652d9bbe1d6f5828c62f6f -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.9.24-ha878542_0.tar.bz2#41e4e87062433e283696cf384f952ef6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 -https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb -https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_15.tar.bz2#5dd5127afd710f91f6a75821bac0a4f0 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.39-hcc3a1bd_1.conda#737be0d34c22d24432049ab7a3214de4 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-10.4.0-hd38fd1e_19.tar.bz2#b41d6540a78ba2518655eebcb0e41e20 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-12.2.0-h337968e_19.tar.bz2#164b4b1acaedc47ee7e658ae6b308ca3 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-10.4.0-hd38fd1e_19.tar.bz2#9367571bf3218f968a47c010618a9715 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-12.2.0-h46fd767_19.tar.bz2#1030b1f38c129f2634eae026f704fe60 -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-3_cp39.conda#0dd193187d54e585cac7eab942a8847e -https://conda.anaconda.org/conda-forge/noarch/tzdata-2022f-h191b570_0.tar.bz2#e366350e2343a798e29833286abe2560 -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-12.2.0-h69a702a_19.tar.bz2#cd7a806282c16e1f2d39a7e80d3a3e0d -https://conda.anaconda.org/conda-forge/linux-64/libgomp-12.2.0-h65d4601_19.tar.bz2#cedcee7c064c01c403f962c9e8d3c373 -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_15.tar.bz2#66c192522eacf5bb763568b4e415d133 -https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.39-he00db2b_1.conda#3d726e8b51a1f5bfd66892a2b7d9db2d -https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab -https://conda.anaconda.org/conda-forge/linux-64/binutils-2.39-hdd6e379_1.conda#1276c18b0a562739185dbf5bd14b57b2 -https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.39-h5fc0e48_11.tar.bz2#b7d26ab37be17ea4c366a97138684bcb -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-12.2.0-h65d4601_19.tar.bz2#e4c94f80aef025c17ab0828cd85ef535 -https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.3.2-h166bdaf_0.tar.bz2#b7607b7b62dce55c194ad84f99464e5f -https://conda.anaconda.org/conda-forge/linux-64/aom-3.5.0-h27087fc_0.tar.bz2#a08150fd2298460cd1fcccf626305642 -https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.18.1-h7f98852_0.tar.bz2#f26ef8098fab1f719c91eb760d63381a -https://conda.anaconda.org/conda-forge/linux-64/charls-2.3.4-h9c3ff4c_0.tar.bz2#c3f85a96a52befc5e41cab1145c8d3c2 -https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.0.0-h166bdaf_1.tar.bz2#e890928299fe7242a108850fc0a5b7fc -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-h27087fc_0.tar.bz2#c4fbad8d4bddeb3c085f18cbf97fbfad -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.1-h36c2ea0_2.tar.bz2#626e68ae9cc5912d6adb79d318cf962d -https://conda.anaconda.org/conda-forge/linux-64/icu-69.1-h9c3ff4c_0.tar.bz2#e0773c9556d588b062a4e1424a6a02fa -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 -https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-h7f98852_2.tar.bz2#8e787b08fe19986d99d034b839df2961 -https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libaec-1.0.6-h9c3ff4c_0.tar.bz2#c77f5e4e418fa47d699d6afa54c5d444 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.14-h166bdaf_0.tar.bz2#fc84a0446e4e4fb882e78d786cfb9734 -https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-h516909a_1.tar.bz2#6f8720dff19e17ce5d48cfe7f3d2f0a3 -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206 -https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680 -https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.21-pthreads_h78a6416_3.tar.bz2#8c5963a49b6035c40646a763293fbb35 -https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f -https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-10.4.0-h5246dfb_19.tar.bz2#b068ad132a509367bc9e5a200a639429 -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.32.1-h7f98852_1000.tar.bz2#772d69f030955d9646d3d0eaf21d859d -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.4-h166bdaf_0.tar.bz2#ac2ccf7323d21f2994e4d1f5da664f37 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2 -https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.3-h9c3ff4c_1.tar.bz2#fbe97e8fa6f275d7c76a09e795adc3e6 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.3-h27087fc_1.tar.bz2#4acfc691e64342b9dae57cf2adc63238 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.32-h9c3ff4c_1.tar.bz2#29ded371806431b0499aaee146abfc3e -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.0.7-h166bdaf_0.tar.bz2#d1ad1824c71e67dea42f07e06cd177dc -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.9-hbd366e4_2.tar.bz2#48018e187dacc6002d3ede9c824238ac -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.9-h7f98852_0.tar.bz2#bf6f803a544f26ebbdc3bfff272eb179 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae -https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.0-h27087fc_3.tar.bz2#0428af0510c3fafedf1c66b43102a34b -https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.0.6-h166bdaf_0.tar.bz2#8650e4fb44c4a618e5ab3e1e19607e32 -https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-10.4.0-h5231bdf_19.tar.bz2#a086547de4cee874e72d5a43230372ec -https://conda.anaconda.org/conda-forge/linux-64/libavif-0.11.1-h5cdd6b5_0.tar.bz2#2040f9067e8852606208cafa66c3563f -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_openblas.tar.bz2#d9b7a8639171f6c6fa0a983edabcfe2b -https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82 -https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25 -https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.10-h28343ad_4.tar.bz2#4a049fc560e00e43151dc51368915fdd -https://conda.anaconda.org/conda-forge/linux-64/libllvm13-13.0.1-hf817b99_2.tar.bz2#47da3ce0d8b2e65ccb226c186dd91eba -https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.47.0-hff17c54_1.tar.bz2#2b7dbfa6988a41f9d23ba6d4f0e1d74e -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.40.0-h753d276_0.tar.bz2#2e5f9a37d487e1019fd4d8113adb2f9f -https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.10.0-hf14f497_3.tar.bz2#d85acad4b47dff4e3def14a769a97906 -https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-15.0.5-he0ac6c6_0.tar.bz2#5c4783b468153a1d8f33874c5bb55864 -https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.31-h26416b9_0.tar.bz2#6c531bc30d49ae75b9c7c7f65bd62e3c -https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.21-pthreads_h320a7e8_3.tar.bz2#29155b9196b9d78022f11d86733e25a7 -https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b -https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h6239696_4.tar.bz2#adcf0be7897e73e312bd24353b613f74 -https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.1-h83bc5f7_3.tar.bz2#37baca23e60af4130cfc03e8ab9f8e22 -https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4 -https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.4.3-h7a311fb_0.tar.bz2#675c0a3103fd69380bda86cfddb0f3f4 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_0.tar.bz2#4e54cbfc47b8c74c2ecc1e7730d8edce -https://conda.anaconda.org/conda-forge/linux-64/gcc-10.4.0-hb92f740_11.tar.bz2#492fd2006232e01ddcf85994f3d9bdac -https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-10.4.0-h9215b83_11.tar.bz2#8ec7a24818e75cd2975e6fe785ad18eb -https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-10.4.0-h7d168d2_19.tar.bz2#2d598895087101a581a617221b815ec2 -https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-10.4.0-h5231bdf_19.tar.bz2#de8c00c5162b819c3e8a7f64ed32baf1 -https://conda.anaconda.org/conda-forge/linux-64/krb5-1.19.3-h08a2579_0.tar.bz2#d25e05e7ee0e302b52d24491db4891eb -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_openblas.tar.bz2#20bae26d0a1db73f758fc3754cab4719 -https://conda.anaconda.org/conda-forge/linux-64/libclang-13.0.1-default_hc23dcda_0.tar.bz2#8cebb0736cba83485b13dc10d242d96d -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.74.1-h606061b_1.tar.bz2#ed5349aa96776e00b34eccecf4a948fe -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_openblas.tar.bz2#955d993f41f9354bf753d29864ea20ad -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h55922b4_4.tar.bz2#901791f0ec7cddc8714e76e273013a91 -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.12-h885dcf4_1.tar.bz2#d1355eaa48f465782f228275a0a69771 -https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.31-hbc51c84_0.tar.bz2#da9633eee814d4e910fe42643a356315 -https://conda.anaconda.org/conda-forge/linux-64/python-3.9.15-hba424b6_0_cpython.conda#7b9485fce17fac2dd4aca6117a9936c2 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.40.0-h4ff8645_0.tar.bz2#bb11803129cbbb53ed56f9506ff74145 -https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.12-py_0.tar.bz2#2489a97287f90176ecdc3ca982b4b0a0 -https://conda.anaconda.org/conda-forge/noarch/appdirs-1.4.4-pyh9f0ad1d_0.tar.bz2#5f095bc6454094e96f146491fd03633b -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418 -https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.5.1-h166bdaf_0.tar.bz2#0667d7da14e682c9d07968601f6233ef -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-2.1.1-pyhd8ed1ab_0.tar.bz2#c1d5b294fbf9a795dec349a6f4d8be8e -https://conda.anaconda.org/conda-forge/noarch/click-8.1.3-unix_pyhd8ed1ab_2.tar.bz2#20e4087407c7cb04a40817114b333dbf -https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.2.0-pyhd8ed1ab_0.tar.bz2#a6cf47b09786423200d7982d1faa19eb -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.32-py39h5a03fae_1.tar.bz2#fb8cd95c2b97eaa8e6eba63021b41567 -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d -https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py39hf3d152e_1.tar.bz2#adb733ec2ee669f6d010758d054da60f -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.1-hc2a2eb6_0.tar.bz2#78415f0180a8d9c5bcc47889e00d5fb1 -https://conda.anaconda.org/conda-forge/noarch/fsspec-2022.11.0-pyhd8ed1ab_0.tar.bz2#eb919f2119a6db5d0192f9e9c3711572 -https://conda.anaconda.org/conda-forge/linux-64/gfortran-10.4.0-h0c96582_11.tar.bz2#9a22e19ae1d372f19a6514a4442f7917 -https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-10.4.0-h69d5af5_11.tar.bz2#7d42e71ff8a9f51b7a206ee35a742ce1 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.74.1-h6239696_1.tar.bz2#5f442e6bc9d89ba236eb25a25c5c2815 -https://conda.anaconda.org/conda-forge/linux-64/gxx-10.4.0-hb92f740_11.tar.bz2#a286961cd68f7d36f4ece4578042567c -https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-10.4.0-h6e491c6_11.tar.bz2#842f0029666e37e929cbd1e7614f5862 -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py39hf939315_1.tar.bz2#41679a052a8ce841c74df1ebc802e411 -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.14-h6ed2654_0.tar.bz2#dcc588839de1445d90995a0a2c4f3a39 -https://conda.anaconda.org/conda-forge/linux-64/libcurl-7.86.0-h2283fc2_1.tar.bz2#fdca8cd67ec2676f90a70ac73a32538b -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_openblas.tar.bz2#823ceb5567e1a595deb643fcd17aed5a -https://conda.anaconda.org/conda-forge/linux-64/libpq-14.5-he2d8382_1.tar.bz2#c194811a2d160ef3210218ee508b6075 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.0.3-he3ba5ed_0.tar.bz2#f9dbabc7e01c459ed7a1d1d64b206e9b -https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.1-py39hb9d737c_2.tar.bz2#c678e07e7862b3157fb9f6d908233ffa -https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 -https://conda.anaconda.org/conda-forge/noarch/networkx-2.8.8-pyhd8ed1ab_0.tar.bz2#bb45ff9deddb045331fd039949f39650 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.78-h2350873_0.tar.bz2#ab3df39f96742e6f1a9878b09274c1dc -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.23.5-py39h3d75532_0.conda#ea5d332e361eb72c2593cf79559bc0ec -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h7d73246_1.tar.bz2#a11b4df9271a8d7917686725aa04c8f2 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.4-py39hb9d737c_0.tar.bz2#12184951da572828fb986b06ffb63eed -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pycparser-2.21-pyhd8ed1ab_0.tar.bz2#076becd9e05608f8dc72757d5f3a91ff -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-4.19.18-py39he80948d_8.tar.bz2#9dbac74c150d2542eca77c02da307168 -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/pytz-2022.6-pyhd8ed1ab_0.tar.bz2#b1f26ad83328e486910ef7f6e81dc061 -https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0-py39hb9d737c_5.tar.bz2#ef9db3c38ae7275f6b14491cfe61a248 -https://conda.anaconda.org/conda-forge/noarch/setuptools-65.5.1-pyhd8ed1ab_0.tar.bz2#cfb8dc4d9d285ca5fb1177b9dd450e33 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.2-py_0.tar.bz2#20b2eaeaeea4ef9a9a0d99770620fd09 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.2-py_0.tar.bz2#68e01cac9d38d0e717cd5c87bc3d2cc9 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.0-pyhd8ed1ab_0.tar.bz2#77dad82eb9c8c1525ff7953e0756d708 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7 -https://conda.anaconda.org/conda-forge/noarch/tenacity-8.1.0-pyhd8ed1ab_0.tar.bz2#97e6f26dd5b93c9f5e6142e16ee3af62 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py39hb9d737c_1.tar.bz2#8a7d309b08cff6386fe384aa10dd3748 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.4.0-pyha770c72_0.tar.bz2#2d93b130d148d7fc77e583677792fc6a -https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py39hb9d737c_0.tar.bz2#230d65004135bf312504a1bbcb0c7a08 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.38.4-pyhd8ed1ab_0.tar.bz2#c829cfb8cb826acb9de0ac1a2df0a940 -https://conda.anaconda.org/conda-forge/noarch/zipp-3.10.0-pyhd8ed1ab_0.tar.bz2#cd4eb48ebde7de61f92252979aab515c -https://conda.anaconda.org/conda-forge/noarch/babel-2.11.0-pyhd8ed1ab_0.tar.bz2#2ea70fde8d581ba9425a761609eed6ba -https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-16_linux64_openblas.tar.bz2#519562d6176dab9c2ab9a8336a14c8e7 -https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad -https://conda.anaconda.org/conda-forge/linux-64/cffi-1.15.1-py39he91dace_2.tar.bz2#fc70a133e8162f51e363cff3b6dc741c -https://conda.anaconda.org/conda-forge/linux-64/cfitsio-4.2.0-hd9d235c_0.conda#8c57a9adbafd87f5eff842abde599cb4 -https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.6-py39hf939315_0.tar.bz2#fb3f77fe25042c20c51974fcfe72f797 -https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.5.1-h924138e_0.tar.bz2#45830a0730fee6c23551878c5f05a219 -https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.0-py39hb9d737c_1.tar.bz2#eb31327ace8dac15c2df243d9505a132 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.38.0-py39hb9d737c_1.tar.bz2#3f2d104f2fefdd5e8a205dd3aacbf1d7 -https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.5.1-h2a4ca65_0.tar.bz2#4851e61ed9676cee9e50136f2a373302 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.74.1-h6239696_1.tar.bz2#f3220a9e9d3abcbfca43419a219df7e4 -https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-5.0.0-pyha770c72_1.tar.bz2#ec069c4db6a0ad84107bac5da62819d2 -https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.2-pyhd8ed1ab_1.tar.bz2#c8490ed5c70966d232fdd389d0dbed37 -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/noarch/partd-1.3.0-pyhd8ed1ab_0.tar.bz2#af8c82d121e63082926062d61d9abb54 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py39hf3a2cdf_3.tar.bz2#2bd111c38da69056e5fe25a51b832eba -https://conda.anaconda.org/conda-forge/noarch/pip-22.3.1-pyhd8ed1ab_0.tar.bz2#da66f2851b9836d3a7c5190082a45f7d -https://conda.anaconda.org/conda-forge/noarch/plotly-5.11.0-pyhd8ed1ab_0.tar.bz2#71aef86c572ad0ee49dba9af238d9c13 -https://conda.anaconda.org/conda-forge/noarch/pygments-2.13.0-pyhd8ed1ab_0.tar.bz2#9f478e8eedd301008b5f395bad0caaed -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.3.0-py39h2ae25f5_2.tar.bz2#234ad9828eca1caf0f2fdcb4a24ad816 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.9.3-py39hddc5342_2.tar.bz2#0615ac8191c6ccf7d40860aff645f774 -https://conda.anaconda.org/conda-forge/linux-64/blas-2.116-openblas.tar.bz2#02f34bcf0aceb6fae4c4d1ecb71c852a -https://conda.anaconda.org/conda-forge/linux-64/brotlipy-0.7.0-py39hb9d737c_1005.tar.bz2#a639fdd9428d8b25f8326a3838d54045 -https://conda.anaconda.org/conda-forge/linux-64/compilers-1.5.1-ha770c72_0.tar.bz2#8a0ff3c519396696bbe9ca786606372f -https://conda.anaconda.org/conda-forge/linux-64/cryptography-38.0.3-py39h3ccb8fc_0.tar.bz2#64119cc315958472211288435368f1e5 -https://conda.anaconda.org/conda-forge/noarch/dask-core-2022.11.1-pyhd8ed1ab_0.conda#383ee12e7c9c27adab310a884bc359ab -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.20.3-hd4edc92_2.tar.bz2#153cfb02fb8be7dd7cabcbcb58a63053 -https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2022.9.26-py39hf32c164_4.conda#4bdbe7db90f8c77efb9eb8ef6417343d -https://conda.anaconda.org/conda-forge/noarch/imageio-2.22.4-pyhfa7a67d_0.conda#aa86d07656fd55578073e9980a6d7c07 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.6.2-py39hf9fd14e_0.tar.bz2#78ce32061e0be12deb8e0f11ffb76906 -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.5.2-py39h4661b88_0.conda#e17e50269c268d79478956a262a9fe13 -https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.3-pyhd8ed1ab_0.tar.bz2#50ef6b29b1fb0768ca82c5aeb4fb2d96 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39h7c9e3ff_2.tar.bz2#d2f1c4eed5ed41fb1bf3e905ccac0eb8 -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.20.2-hcf0ee16_0.tar.bz2#79d7fca692d224dc29a72bda90f78a7b -https://conda.anaconda.org/conda-forge/noarch/pyopenssl-22.1.0-pyhd8ed1ab_0.tar.bz2#fbfa0a180d48c800f922a10a114a8632 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.1-pyhd8ed1ab_0.tar.bz2#f87b94dc53178574eedd09c317c2318f -https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.13.5-py39h2ae25f5_2.tar.bz2#598b14b778a8f3e06a3579649f0e3c00 -https://conda.anaconda.org/conda-forge/noarch/tifffile-2022.10.10-pyhd8ed1ab_0.tar.bz2#1c126ff5b4643785bbc16e44e6327e41 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/qt-5.12.9-h1304e3e_6.tar.bz2#f2985d160b8c43dd427923c04cd732fe -https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.19.3-py39h4661b88_2.tar.bz2#a8d53b12aedcd84107ba8c85c81be56f -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.1-hd8ed1ab_0.tar.bz2#b7e4c670752726d4991298fa0c581e97 -https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 -https://conda.anaconda.org/conda-forge/linux-64/pyqt-impl-5.12.3-py39hde8b62d_8.tar.bz2#4863d6734a1bd7a86ac5ede53bf9b3c7 -https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_1.tar.bz2#089382ee0e2dc2eae33a04cc3c2bddb0 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.6.0-pyhd8ed1ab_0.tar.bz2#6429e1d1091c51f626b5dcfdd38bf429 -https://conda.anaconda.org/conda-forge/linux-64/pyqtchart-5.12-py39h0fcd23e_8.tar.bz2#d7d18728be87fdc0ddda3e65d41caa53 -https://conda.anaconda.org/conda-forge/linux-64/pyqtwebengine-5.12.1-py39h0fcd23e_8.tar.bz2#2098c2b2c9a38b43678a27819ff9433f -https://conda.anaconda.org/conda-forge/noarch/sphinx-5.3.0-pyhd8ed1ab_0.tar.bz2#f9e1fcfe235d655900bfeb6aee426472 -https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.5.0-pyhd8ed1ab_0.tar.bz2#3c275d7168a6a135329f4acb364c229a -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py39hf3d152e_8.tar.bz2#466425e3ee3b190e06b8a5a7098421aa -https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.11.1-pyhd8ed1ab_0.tar.bz2#729254314a5d178eefca50acbc2687b8 -https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.6.2-py39hf3d152e_0.tar.bz2#03225b4745d1dee7bb19d81e41c773a0 -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/d0/74/196f5da691be83ab02f8e9bd2c8acc2a3b0712da0a871f4aa2b7a023f90f/sphinxext_opengraph-0.7.3-py3-none-any.whl#sha256=edbfb21f1d31f572fc87a6ccc347cac502a3b8bb04c312bc2fa4888542f8505d diff --git a/build_tools/github/doc_min_dependencies_linux-64_conda.lock b/build_tools/github/doc_min_dependencies_linux-64_conda.lock deleted file mode 100644 index d5d233094e3c6..0000000000000 --- a/build_tools/github/doc_min_dependencies_linux-64_conda.lock +++ /dev/null @@ -1,170 +0,0 @@ -# Generated by conda-lock. -# platform: linux-64 -# input_hash: 980f5bade7f2b6355391f184da81979ecdbbc22d74d1c965c7bed1921e988107 -@EXPLICIT -https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2022.9.24-ha878542_0.tar.bz2#41e4e87062433e283696cf384f952ef6 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_15.tar.bz2#5dd5127afd710f91f6a75821bac0a4f0 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.36.1-hea4e1c9_2.tar.bz2#bd4f2e711b39af170e7ff15163fe87ee -https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-7.5.0-hda03d7c_20.tar.bz2#2146b25eb2a762a44fab709338a7b6d9 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran4-7.5.0-h14aa051_20.tar.bz2#a072eab836c3a9578ce72b5640ce592d -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-7.5.0-hb016644_20.tar.bz2#31d5500f621954679ee41d7f5d1089fb -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-12.2.0-h46fd767_19.tar.bz2#1030b1f38c129f2634eae026f704fe60 -https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.8-3_cp38.conda#2f3f7af062b42d664117662612022204 -https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-7.5.0-h14aa051_20.tar.bz2#c3b2ad091c043c08689e64b10741484b -https://conda.anaconda.org/conda-forge/linux-64/libgomp-12.2.0-h65d4601_19.tar.bz2#cedcee7c064c01c403f962c9e8d3c373 -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_15.tar.bz2#66c192522eacf5bb763568b4e415d133 -https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.36.1-h193b22a_2.tar.bz2#32aae4265554a47ea77f7c09f86aeb3b -https://conda.anaconda.org/conda-forge/linux-64/binutils-2.36.1-hdd6e379_2.tar.bz2#3111f86041b5b6863545ca49130cca95 -https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.36-hf3e587d_33.tar.bz2#72b245322c589284f1b92a5c971e5cb6 -https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-12.2.0-h65d4601_19.tar.bz2#e4c94f80aef025c17ab0828cd85ef535 -https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-h27087fc_0.tar.bz2#c4fbad8d4bddeb3c085f18cbf97fbfad -https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-7.5.0-habd7529_20.tar.bz2#42140612518a7ce78f571d64b6a50ba3 -https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 -https://conda.anaconda.org/conda-forge/linux-64/icu-64.2-he1b5a44_1.tar.bz2#8e881214a23508f1541eb7a3135d6fcb -https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h166bdaf_2.tar.bz2#ee8b844357a0946870901c7c6f418268 -https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.14-h166bdaf_0.tar.bz2#fc84a0446e4e4fb882e78d786cfb9734 -https://conda.anaconda.org/conda-forge/linux-64/libffi-3.2.1-he1b5a44_1007.tar.bz2#11389072d7d6036fd811c3d9460475cd -https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d -https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.32.1-h7f98852_1000.tar.bz2#772d69f030955d9646d3d0eaf21d859d -https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.2.4-h166bdaf_0.tar.bz2#ac2ccf7323d21f2994e4d1f5da664f37 -https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-0.10.0-he1b5a44_0.tar.bz2#78ccac2098edcd3673af2ceb3e95f932 -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41 -https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.3-h27087fc_1.tar.bz2#4acfc691e64342b9dae57cf2adc63238 -https://conda.anaconda.org/conda-forge/linux-64/nspr-4.32-h9c3ff4c_1.tar.bz2#29ded371806431b0499aaee146abfc3e -https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1s-h166bdaf_0.tar.bz2#e17553617ce05787d97715177be014d1 -https://conda.anaconda.org/conda-forge/linux-64/pcre-8.45-h9c3ff4c_0.tar.bz2#c05d1820a6d34ff07aaaab7a9b7eddaa -https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.9-h7f98852_0.tar.bz2#bf6f803a544f26ebbdc3bfff272eb179 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 -https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae -https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-7.5.0-h47867f9_33.tar.bz2#3a31c3f430a31184a5d07e67d3b24e2c -https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-7.5.0-h56cb351_20.tar.bz2#8f897b30195bd3a2251b4c51c3cc91cf -https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-7.5.0-hd0bb8aa_20.tar.bz2#dbe78fc5fb9c339f8e55426559e12f7b -https://conda.anaconda.org/conda-forge/linux-64/libllvm9-9.0.1-default_hc23dcda_7.tar.bz2#9f4686a2c319355fe8636ca13783c3b4 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.40.0-h753d276_0.tar.bz2#2e5f9a37d487e1019fd4d8113adb2f9f -https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904 -https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-15.0.5-he0ac6c6_0.tar.bz2#5c4783b468153a1d8f33874c5bb55864 -https://conda.anaconda.org/conda-forge/linux-64/readline-8.1.2-h0f457ee_0.tar.bz2#db2ebbe2943aae81ed051a6a9af8e0fa -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168 -https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295 -https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h6239696_4.tar.bz2#adcf0be7897e73e312bd24353b613f74 -https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.1.1-h516909a_0.tar.bz2#d98aa4948ec35f52907e2d6152e2b255 -https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_0.tar.bz2#4e54cbfc47b8c74c2ecc1e7730d8edce -https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-7.5.0-h78c8a43_33.tar.bz2#b2879010fb369f4012040f7a27657cd8 -https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-7.5.0-h555fc39_33.tar.bz2#5cf979793d2c5130a012cb6480867adc -https://conda.anaconda.org/conda-forge/linux-64/libclang-9.0.1-default_hb4e5071_5.tar.bz2#9dde69aa2a8ecd575a16e44987bdc9f7 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.66.3-hbe7bbb4_0.tar.bz2#d5a09a9e981849b751cb75656b7302a0 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.4.0-h55922b4_4.tar.bz2#901791f0ec7cddc8714e76e273013a91 -https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.10-hee79883_0.tar.bz2#0217b0926808b1adf93247bba489d733 -https://conda.anaconda.org/conda-forge/linux-64/mkl-2020.4-h726a3e6_304.tar.bz2#b9b35a50e5377b19da6ec0709ae77fc3 -https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.40.0-h4ff8645_0.tar.bz2#bb11803129cbbb53ed56f9506ff74145 -https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.1.1-hc9558a2_0.tar.bz2#1eb7c67eb11eab0c98a87f84174fdde1 -https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.1-hc2a2eb6_0.tar.bz2#78415f0180a8d9c5bcc47889e00d5fb1 -https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.1.1-he991be0_0.tar.bz2#e38ac82cc517b9e245c1ae99f9f140da -https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.14-h6ed2654_0.tar.bz2#dcc588839de1445d90995a0a2c4f3a39 -https://conda.anaconda.org/conda-forge/linux-64/libblas-3.8.0-20_mkl.tar.bz2#8fbce60932c01d0e193a1a814f2002be -https://conda.anaconda.org/conda-forge/linux-64/nss-3.78-h2350873_0.tar.bz2#ab3df39f96742e6f1a9878b09274c1dc -https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h7d73246_1.tar.bz2#a11b4df9271a8d7917686725aa04c8f2 -https://conda.anaconda.org/conda-forge/linux-64/python-3.8.6-h852b56e_0_cpython.tar.bz2#dd65401dfb61ac030edc0dc4d15c2c51 -https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.12-py_0.tar.bz2#2489a97287f90176ecdc3ca982b4b0a0 -https://conda.anaconda.org/conda-forge/noarch/appdirs-1.4.4-pyh9f0ad1d_0.tar.bz2#5f095bc6454094e96f146491fd03633b -https://conda.anaconda.org/conda-forge/noarch/attrs-22.1.0-pyh71513ae_1.tar.bz2#6d3ccbc56256204925bfa8378722792f -https://conda.anaconda.org/conda-forge/noarch/certifi-2022.9.24-pyhd8ed1ab_0.tar.bz2#f66309b099374af91369e67e84af397d -https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-2.1.1-pyhd8ed1ab_0.tar.bz2#c1d5b294fbf9a795dec349a6f4d8be8e -https://conda.anaconda.org/conda-forge/noarch/click-8.1.3-unix_pyhd8ed1ab_2.tar.bz2#20e4087407c7cb04a40817114b333dbf -https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.2.0-pyhd8ed1ab_0.tar.bz2#a6cf47b09786423200d7982d1faa19eb -https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99 -https://conda.anaconda.org/conda-forge/linux-64/compilers-1.1.1-0.tar.bz2#1ba267e19dbaf3db9dd0404e6fb9cdb9 -https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb -https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.24-py38h709712a_1.tar.bz2#9e5fe389471a13ae523ae980de4ad1f4 -https://conda.anaconda.org/conda-forge/linux-64/docutils-0.17.1-py38h578d9bd_3.tar.bz2#34e1f12e3ed15aff218644e9d865b722 -https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.0.4-pyhd8ed1ab_0.tar.bz2#e0734d1f12de77f9daca98bda3428733 -https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2 -https://conda.anaconda.org/conda-forge/noarch/fsspec-2022.11.0-pyhd8ed1ab_0.tar.bz2#eb919f2119a6db5d0192f9e9c3711572 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.66.3-h58526e2_0.tar.bz2#62c2e5c84f6cdc7ded2307ef9c30dc8c -https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed -https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 -https://conda.anaconda.org/conda-forge/noarch/iniconfig-1.1.1-pyh9f0ad1d_0.tar.bz2#39161f81cc5e5ca45b8226fbb06c6905 -https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py38h43d8883_1.tar.bz2#41ca56d5cac7bfc7eb4fcdbee878eb84 -https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.8.0-20_mkl.tar.bz2#14b25490fdcc44e879ac6c10fe764f68 -https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.8.0-20_mkl.tar.bz2#52c0ae3606eeae7e1d493f37f336f4f5 -https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4 -https://conda.anaconda.org/conda-forge/linux-64/markupsafe-1.1.1-py38h0a891b7_4.tar.bz2#d182e0c60439427453ed4a7abd28ef0d -https://conda.anaconda.org/conda-forge/noarch/networkx-2.8.8-pyhd8ed1ab_0.tar.bz2#bb45ff9deddb045331fd039949f39650 -https://conda.anaconda.org/conda-forge/linux-64/pillow-9.2.0-py38h9eb91d8_3.tar.bz2#61dc7b3140b7b79b1985b53d52726d74 -https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9 -https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.4-py38h0a891b7_0.tar.bz2#fe2ef279417faa1af0adf178de2032f7 -https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054 -https://conda.anaconda.org/conda-forge/noarch/pycparser-2.21-pyhd8ed1ab_0.tar.bz2#076becd9e05608f8dc72757d5f3a91ff -https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc -https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/pytz-2022.6-pyhd8ed1ab_0.tar.bz2#b1f26ad83328e486910ef7f6e81dc061 -https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0-py38h0a891b7_5.tar.bz2#0856c59f9ddb710c640dc0428d66b1b7 -https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py38h578d9bd_1.tar.bz2#da023e4a9c777abc28434d7a6473dcc2 -https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.2-py_0.tar.bz2#20b2eaeaeea4ef9a9a0d99770620fd09 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.2-py_0.tar.bz2#68e01cac9d38d0e717cd5c87bc3d2cc9 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.0-pyhd8ed1ab_0.tar.bz2#77dad82eb9c8c1525ff7953e0756d708 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4 -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a -https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7 -https://conda.anaconda.org/conda-forge/noarch/tenacity-8.1.0-pyhd8ed1ab_0.tar.bz2#97e6f26dd5b93c9f5e6142e16ee3af62 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c -https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.2-py38h0a891b7_1.tar.bz2#358beb228a53b5e1031862de3525d1d3 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.4.0-pyha770c72_0.tar.bz2#2d93b130d148d7fc77e583677792fc6a -https://conda.anaconda.org/conda-forge/noarch/wheel-0.38.4-pyhd8ed1ab_0.tar.bz2#c829cfb8cb826acb9de0ac1a2df0a940 -https://conda.anaconda.org/conda-forge/noarch/babel-2.11.0-pyhd8ed1ab_0.tar.bz2#2ea70fde8d581ba9425a761609eed6ba -https://conda.anaconda.org/conda-forge/linux-64/cffi-1.14.4-py38ha312104_0.tar.bz2#8f82b87522fbb1d4b24e8b5e2b1d0501 -https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.0-py38h0a891b7_1.tar.bz2#183f6160ab3498b882e903b06be7d430 -https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-hfdff14a_1.tar.bz2#4caaca6356992ee545080c7d7193b5a3 -https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.14.5-h36ae1b5_2.tar.bz2#00084ab2657be5bf0ba0757ccde797ef -https://conda.anaconda.org/conda-forge/noarch/jinja2-2.11.3-pyhd8ed1ab_2.tar.bz2#bdedf6199eec03402a0c5db1f25e891e -https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb -https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.8.0-20_mkl.tar.bz2#8274dc30518af9df1de47f5d9e73165c -https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b -https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.3-py38h95a1406_0.tar.bz2#bc0cbf611fe2f86eab29b98e51404f5e -https://conda.anaconda.org/conda-forge/noarch/packaging-21.3-pyhd8ed1ab_0.tar.bz2#71f1ab2de48613876becddd496371c85 -https://conda.anaconda.org/conda-forge/noarch/partd-1.3.0-pyhd8ed1ab_0.tar.bz2#af8c82d121e63082926062d61d9abb54 -https://conda.anaconda.org/conda-forge/noarch/pip-22.3.1-pyhd8ed1ab_0.tar.bz2#da66f2851b9836d3a7c5190082a45f7d -https://conda.anaconda.org/conda-forge/noarch/plotly-5.10.0-pyhd8ed1ab_0.tar.bz2#e95502aa0f8e3db05d198214472575de -https://conda.anaconda.org/conda-forge/noarch/pygments-2.13.0-pyhd8ed1ab_0.tar.bz2#9f478e8eedd301008b5f395bad0caaed -https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 -https://conda.anaconda.org/conda-forge/linux-64/blas-2.20-mkl.tar.bz2#e7d09a07f5413e53dca5282b8fa50bed -https://conda.anaconda.org/conda-forge/linux-64/brotlipy-0.7.0-py38h0a891b7_1005.tar.bz2#e99e08812dfff30fdd17b3f8838e2759 -https://conda.anaconda.org/conda-forge/linux-64/cryptography-38.0.3-py38h2b5fc30_0.tar.bz2#218274e4a04630a977b4da2b45eff593 -https://conda.anaconda.org/conda-forge/noarch/dask-core-2022.11.1-pyhd8ed1ab_0.conda#383ee12e7c9c27adab310a884bc359ab -https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.14.5-h0935bb2_2.tar.bz2#eb125ee86480e00a4a1ed45a577c3311 -https://conda.anaconda.org/conda-forge/noarch/imageio-2.22.4-pyhfa7a67d_0.conda#aa86d07656fd55578073e9980a6d7c07 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.1.3-py38h250f245_0.tar.bz2#eb182969d8ed019d4de6939f393270d2 -https://conda.anaconda.org/conda-forge/linux-64/pandas-1.0.5-py38hcb8c335_0.tar.bz2#1e1b4382170fd26cf722ef008ffb651e -https://conda.anaconda.org/conda-forge/noarch/pytest-7.2.0-pyhd8ed1ab_2.tar.bz2#ac82c7aebc282e6ac0450fca012ca78c -https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.1.1-py38h5c078b8_3.tar.bz2#dafeef887e68bd18ec84681747ca0fd5 -https://conda.anaconda.org/conda-forge/linux-64/scipy-1.3.2-py38h921218d_0.tar.bz2#278670dc2fef5a6309d1635f047bd456 -https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.3-pyhd8ed1ab_0.tar.bz2#50ef6b29b1fb0768ca82c5aeb4fb2d96 -https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.0.0-py38hf6732f7_1003.tar.bz2#44e00bf7a4b6a564e9313181aaea2615 -https://conda.anaconda.org/conda-forge/noarch/pyopenssl-22.1.0-pyhd8ed1ab_0.tar.bz2#fbfa0a180d48c800f922a10a114a8632 -https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.4.0-pyhd8ed1ab_1.tar.bz2#60958bab291681d9c3ba69e80f1434cf -https://conda.anaconda.org/conda-forge/linux-64/qt-5.12.5-hd8c4c69_1.tar.bz2#0e105d4afe0c3c81c4fbd9937ec4f359 -https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.16.2-py38hb3f55d8_0.tar.bz2#468b398fefac8884cd6e6513af66549b -https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.1-pyhd8ed1ab_0.tar.bz2#f87b94dc53178574eedd09c317c2318f -https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38ha8c2ead_3.tar.bz2#242c206b0c30fdc4c18aea16f04c4262 -https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e -https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.12.2-py38h5c078b8_0.tar.bz2#33787719ad03d33cffc4e2e3ea82bc9e -https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.11-pyhd8ed1ab_0.tar.bz2#0738978569b10669bdef41c671252dd1 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.1.3-py38_0.tar.bz2#1992ab91bbff86ded8d99d1f488d8e8b -https://conda.anaconda.org/conda-forge/noarch/requests-2.28.1-pyhd8ed1ab_1.tar.bz2#089382ee0e2dc2eae33a04cc3c2bddb0 -https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.1-hd8ed1ab_0.tar.bz2#b7e4c670752726d4991298fa0c581e97 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.6.0-pyhd8ed1ab_0.tar.bz2#6429e1d1091c51f626b5dcfdd38bf429 -https://conda.anaconda.org/conda-forge/noarch/sphinx-4.0.1-pyh6c4a22f_2.tar.bz2#c203dcc46f262853ecbb9552c50d664e -https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb -https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.7.0-py_0.tar.bz2#80bad3f857ecc86a4ab73f3e57addd13 -https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc -# pip sphinxext-opengraph @ https://files.pythonhosted.org/packages/50/ac/c105ed3e0a00b14b28c0aa630935af858fd8a32affeff19574b16e2c6ae8/sphinxext_opengraph-0.4.2-py3-none-any.whl#sha256=a51f2604f9a5b6c0d25d3a88e694d5c02e20812dc0e482adf96c8628f9109357 diff --git a/build_tools/github/repair_windows_wheels.sh b/build_tools/github/repair_windows_wheels.sh index cdd0c0c79d8c4..8f51a34d4039b 100755 --- a/build_tools/github/repair_windows_wheels.sh +++ b/build_tools/github/repair_windows_wheels.sh @@ -8,6 +8,7 @@ DEST_DIR=$2 # By default, the Windows wheels are not repaired. # In this case, we need to vendor VCRUNTIME140.dll +pip install wheel wheel unpack "$WHEEL" WHEEL_DIRNAME=$(ls -d scikit_learn-*) python build_tools/github/vendor.py "$WHEEL_DIRNAME" diff --git a/build_tools/github/test_source.sh b/build_tools/github/test_source.sh index 3a65a657addec..c93d22a08e791 100755 --- a/build_tools/github/test_source.sh +++ b/build_tools/github/test_source.sh @@ -13,7 +13,6 @@ python -m pip install pytest pandas # Run the tests on the installed source distribution mkdir tmp_for_test -cp scikit-learn/scikit-learn/conftest.py tmp_for_test cd tmp_for_test pytest --pyargs sklearn diff --git a/build_tools/github/test_wheels.sh b/build_tools/github/test_wheels.sh deleted file mode 100755 index 1a984bc91dba8..0000000000000 --- a/build_tools/github/test_wheels.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -e -set -x - -if [[ "$OSTYPE" != "linux-gnu" ]]; then - # The Linux test environment is run in a Docker container and - # it is not possible to copy the test configuration file (yet) - cp $CONFTEST_PATH $CONFTEST_NAME -fi - -# Test that there are no links to system libraries in the -# threadpoolctl output section of the show_versions output: -python -c "import sklearn; sklearn.show_versions()" -pytest --pyargs sklearn diff --git a/build_tools/github/test_windows_wheels.sh b/build_tools/github/test_windows_wheels.sh index 43a1a283e652c..07954a7a91970 100755 --- a/build_tools/github/test_windows_wheels.sh +++ b/build_tools/github/test_windows_wheels.sh @@ -11,7 +11,5 @@ docker container run \ docker container run \ -e SKLEARN_SKIP_NETWORK_TESTS=1 \ - -e OMP_NUM_THREADS=2 \ - -e OPENBLAS_NUM_THREADS=2 \ --rm scikit-learn/minimal-windows \ powershell -Command "pytest --pyargs sklearn" diff --git a/build_tools/github/trigger_hosting.sh b/build_tools/github/trigger_hosting.sh deleted file mode 100755 index 2a8e28ff164ff..0000000000000 --- a/build_tools/github/trigger_hosting.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -set -e -set -x - -GITHUB_RUN_URL=https://nightly.link/$GITHUB_REPOSITORY/actions/runs/$RUN_ID - -if [ "$EVENT" == pull_request ] -then - PULL_REQUEST_NUMBER=$(curl \ - -H "Accept: application/vnd.github.v3+json" \ - -H "Authorization: token $GITHUB_TOKEN" \ - https://api.github.com/repos/$REPO_NAME/commits/$COMMIT_SHA/pulls 2>/dev/null \ - | jq '.[0].number') - - if [[ "$PULL_REQUEST_NUMBER" == "null" ]]; then - # The pull request is on the main (default) branch of the fork. The above API - # call is unable to get the PR number associated with the commit: - # https://docs.github.com/en/rest/commits/commits#list-pull-requests-associated-with-a-commit - # We fallback to the search API here. The search API is not used everytime - # because it has a lower rate limit. - PULL_REQUEST_NUMBER=$(curl \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: token $GITHUB_TOKEN" \ - "https://api.github.com/search/issues?q=$COMMIT_SHA+repo:$GITHUB_REPOSITORY" 2>/dev/null \ - | jq '.items[0].number') - fi - - BRANCH=pull/$PULL_REQUEST_NUMBER/head -else - BRANCH=$HEAD_BRANCH -fi - -curl --request POST \ - --url https://circleci.com/api/v2/project/gh/$GITHUB_REPOSITORY/pipeline \ - --header "Circle-Token: $CIRCLE_CI_TOKEN" \ - --header "content-type: application/json" \ - --header "x-attribution-actor-id: github_actions" \ - --header "x-attribution-login: github_actions" \ - --data \{\"branch\":\"$BRANCH\",\"parameters\":\{\"GITHUB_RUN_URL\":\"$GITHUB_RUN_URL\"\}\} diff --git a/build_tools/github/upload_anaconda.sh b/build_tools/github/upload_anaconda.sh index 13e8420e3cc5a..5054b32a53c61 100755 --- a/build_tools/github/upload_anaconda.sh +++ b/build_tools/github/upload_anaconda.sh @@ -3,8 +3,9 @@ set -e set -x -if [ "$GITHUB_EVENT_NAME" == "schedule" ]; then - ANACONDA_ORG="scipy-wheels-nightly" +# Note: build_wheels.sh has the same branch (only for NumPy 2.0 transition) +if [[ "$GITHUB_EVENT_NAME" == "schedule" || "$CIRRUS_CRON" == "nightly" ]]; then + ANACONDA_ORG="scientific-python-nightly-wheels" ANACONDA_TOKEN="$SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN" else ANACONDA_ORG="scikit-learn-wheels-staging" @@ -18,5 +19,5 @@ source activate upload conda install -y anaconda-client # Force a replacement if the remote file already exists -anaconda -t $ANACONDA_TOKEN upload --force -u $ANACONDA_ORG dist/artifact/* +anaconda -t $ANACONDA_TOKEN upload --force -u $ANACONDA_ORG $ARTIFACTS_PATH/* echo "Index: https://pypi.anaconda.org/$ANACONDA_ORG/simple" diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py index 2997688423b84..28b44be3c9aa9 100644 --- a/build_tools/github/vendor.py +++ b/build_tools/github/vendor.py @@ -1,13 +1,11 @@ """Embed vcomp140.dll and msvcp140.dll.""" - import os import os.path as op import shutil import sys import textwrap - TARGET_FOLDER = op.join("sklearn", ".libs") DISTRIBUTOR_INIT = op.join("sklearn", "_distributor_init.py") VCOMP140_SRC_PATH = "C:\\Windows\\System32\\vcomp140.dll" diff --git a/build_tools/linting.sh b/build_tools/linting.sh new file mode 100755 index 0000000000000..aefabfae7b3f5 --- /dev/null +++ b/build_tools/linting.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +# Note that any change in this file, adding or removing steps or changing the +# printed messages, should be also reflected in the `get_comment.py` file. + +# This script shouldn't exit if a command / pipeline fails +set +e +# pipefail is necessary to propagate exit codes +set -o pipefail + +global_status=0 + +echo -e "### Running black ###\n" +black --check --diff . +status=$? + +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by black\n" +else + echo -e "Problems detected by black, please run black and commit the result\n" + global_status=1 +fi + +echo -e "### Running ruff ###\n" +ruff check --output-format=full . +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by ruff\n" +else + echo -e "Problems detected by ruff, please fix them\n" + global_status=1 +fi + +echo -e "### Running mypy ###\n" +mypy sklearn/ +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by mypy\n" +else + echo -e "Problems detected by mypy, please fix them\n" + global_status=1 +fi + +echo -e "### Running cython-lint ###\n" +cython-lint sklearn/ +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by cython-lint\n" +else + echo -e "Problems detected by cython-lint, please fix them\n" + global_status=1 +fi + +# For docstrings and warnings of deprecated attributes to be rendered +# properly, the `deprecated` decorator must come before the `property` decorator +# (else they are treated as functions) + +echo -e "### Checking for bad deprecation order ###\n" +bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"` + +if [ ! -z "$bad_deprecation_property_order" ] +then + echo "deprecated decorator should come before property decorator" + echo "found the following occurrences:" + echo $bad_deprecation_property_order + echo -e "\nProblems detected by deprecation order check\n" + global_status=1 +else + echo -e "No problems detected related to deprecation order\n" +fi + +# Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE + +echo -e "### Checking for default doctest directives ###\n" +doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")" + +if [ ! -z "$doctest_directive" ] +then + echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:" + echo "$doctest_directive" + echo -e "\nProblems detected by doctest directive check\n" + global_status=1 +else + echo -e "No problems detected related to doctest directives\n" +fi + +# Check for joblib.delayed and joblib.Parallel imports +# TODO(1.7): remove ":!sklearn/utils/_joblib.py" +echo -e "### Checking for joblib imports ###\n" +joblib_status=0 +joblib_delayed_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" +if [ ! -z "$joblib_delayed_import" ]; then + echo "Use from sklearn.utils.parallel import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:" + echo "$joblib_delayed_import" + joblib_status=1 +fi +joblib_Parallel_import="$(git grep -l -A 10 -E "joblib import.+Parallel" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" +if [ ! -z "$joblib_Parallel_import" ]; then + echo "Use from sklearn.utils.parallel import Parallel instead of joblib Parallel. The following files contains imports to joblib.Parallel:" + echo "$joblib_Parallel_import" + joblib_status=1 +fi + +if [[ $joblib_status -eq 0 ]] +then + echo -e "No problems detected related to joblib imports\n" +else + echo -e "\nProblems detected by joblib import check\n" + global_status=1 +fi + +echo -e "### Linting completed ###\n" + +if [[ $global_status -eq 1 ]] +then + echo -e "Linting failed\n" + exit 1 +else + echo -e "Linting passed\n" + exit 0 +fi diff --git a/build_tools/shared.sh b/build_tools/shared.sh index 29ce8b27a3810..4866c149d506f 100644 --- a/build_tools/shared.sh +++ b/build_tools/shared.sh @@ -25,3 +25,11 @@ show_installed_libraries(){ python -m pip list fi } + +activate_environment() { + if [[ "$DISTRIB" =~ ^conda.* ]]; then + source activate $VIRTUALENV + elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "debian-32" || "$DISTRIB" == "pip-nogil" ]]; then + source $VIRTUALENV/bin/activate + fi +} diff --git a/build_tools/travis/after_success.sh b/build_tools/travis/after_success.sh deleted file mode 100755 index a09a4013ed946..0000000000000 --- a/build_tools/travis/after_success.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -# This script is meant to be called by the "after_success" step -# defined in ".travis.yml". In particular, we upload the wheels -# of the ARM64 architecture for the continuous deployment jobs. - -set -e - -# The wheels cannot be uploaded on PRs -if [[ $BUILD_WHEEL == true && $TRAVIS_EVENT_TYPE != pull_request ]]; then - # Nightly upload token and staging upload token are set in - # Travis settings (originally generated at Anaconda cloud) - if [[ $TRAVIS_EVENT_TYPE == cron ]]; then - ANACONDA_ORG="scipy-wheels-nightly" - ANACONDA_TOKEN="$SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN" - else - ANACONDA_ORG="scikit-learn-wheels-staging" - ANACONDA_TOKEN="$SCIKIT_LEARN_STAGING_UPLOAD_TOKEN" - fi - - MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh" - wget $MINICONDA_URL -O miniconda.sh - MINICONDA_PATH=$HOME/miniconda - chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH - - # Install Python 3.8 because of a bug with Python 3.9 - export PATH=$MINICONDA_PATH/bin:$PATH - conda create -n upload -y python=3.8 - source activate upload - conda install -y anaconda-client - - # Force a replacement if the remote file already exists - anaconda -t $ANACONDA_TOKEN upload --force -u $ANACONDA_ORG wheelhouse/*.whl - echo "Index: https://pypi.anaconda.org/$ANACONDA_ORG/simple" -fi diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh deleted file mode 100755 index 178260c8dabcb..0000000000000 --- a/build_tools/travis/install.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -# This script is meant to be called by the "install" step -# defined in the ".travis.yml" file. In particular, it is -# important that we call to the right installation script. - -if [[ $BUILD_WHEEL == true ]]; then - source build_tools/travis/install_wheels.sh || travis_terminate 1 -else - source build_tools/travis/install_main.sh || travis_terminate 1 -fi diff --git a/build_tools/travis/install_main.sh b/build_tools/travis/install_main.sh deleted file mode 100755 index c0795139859bb..0000000000000 --- a/build_tools/travis/install_main.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -# Travis clone "scikit-learn/scikit-learn" repository into -# a local repository. We use a cached directory with three -# scikit-learn repositories (one for each matrix entry for -# non continuous deployment jobs) from which we pull local -# Travis repository. This allows us to keep build artifact -# for GCC + Cython, and gain time. - -set -e - -echo "CPU Arch: $TRAVIS_CPU_ARCH." - -# Import "get_dep" -source build_tools/shared.sh - -echo "List files from cached directories." -echo "pip:" -ls $HOME/.cache/pip - -export CC=/usr/lib/ccache/gcc -export CXX=/usr/lib/ccache/g++ - -# Useful for debugging how ccache is used -# export CCACHE_LOGFILE=/tmp/ccache.log - -# 60MB are (more or less) used by .ccache, when -# compiling from scratch at the time of writing -ccache --max-size 100M --show-stats - -# Deactivate the default virtual environment -# to setup a conda-based environment instead -deactivate - -MINICONDA_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh" - -# Install Miniconda -wget $MINICONDA_URL -O miniconda.sh -MINICONDA_PATH=$HOME/miniconda -chmod +x miniconda.sh && ./miniconda.sh -b -p $MINICONDA_PATH -export PATH=$MINICONDA_PATH/bin:$PATH -conda update --yes conda - -# Create environment and install dependencies -conda create -n testenv --yes python=3.7 - -source activate testenv -conda install -y scipy numpy pandas cython -pip install joblib threadpoolctl - -pip install $(get_dep pytest $PYTEST_VERSION) pytest-xdist - -# Build scikit-learn in this script to collapse the -# verbose build output in the Travis output when it -# succeeds -python --version -python -c "import numpy; print(f'numpy {numpy.__version__}')" -python -c "import scipy; print(f'scipy {scipy.__version__}')" - -pip install -e . -python setup.py develop - -ccache --show-stats - -# Useful for debugging how ccache is used -# cat $CCACHE_LOGFILE diff --git a/build_tools/travis/install_wheels.sh b/build_tools/travis/install_wheels.sh deleted file mode 100755 index 0f6cdf256e71b..0000000000000 --- a/build_tools/travis/install_wheels.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -python -m pip install cibuildwheel || travis_terminate $? -python -m cibuildwheel --output-dir wheelhouse || travis_terminate $? diff --git a/build_tools/travis/script.sh b/build_tools/travis/script.sh deleted file mode 100755 index 6e8b7e3deaee1..0000000000000 --- a/build_tools/travis/script.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# This script is meant to be called by the "script" step defined -# in the ".travis.yml" file. While this step is forbidden by the -# continuous deployment jobs, we have to execute the scripts for -# testing the continuous integration jobs. - -if [[ $BUILD_WHEEL != true ]]; then - # This trick will make Travis terminate the continuation of the pipeline - bash build_tools/travis/test_script.sh || travis_terminate 1 - bash build_tools/travis/test_docs.sh || travis_terminate 1 -fi diff --git a/build_tools/travis/test_docs.sh b/build_tools/travis/test_docs.sh deleted file mode 100755 index 4907dee1c9789..0000000000000 --- a/build_tools/travis/test_docs.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -e - -if [[ $TRAVIS_CPU_ARCH != arm64 ]]; then - # Faster run of the documentation tests - PYTEST="pytest -n $CPU_COUNT" make test-doc -fi diff --git a/build_tools/travis/test_script.sh b/build_tools/travis/test_script.sh deleted file mode 100755 index 1551ed858d1a1..0000000000000 --- a/build_tools/travis/test_script.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -set -e - -python --version -python -c "import numpy; print(f'numpy {numpy.__version__}')" -python -c "import scipy; print(f'scipy {scipy.__version__}')" -python -c "\ -try: - import pandas - print(f'pandas {pandas.__version__}') -except ImportError: - pass -" -python -c "import joblib; print(f'{joblib.cpu_count()} CPUs')" -python -c "import platform; print(f'{platform.machine()}')" - -TEST_CMD="pytest --showlocals --durations=20 --pyargs" - -# Run the tests on the installed version -mkdir -p $TEST_DIR - -# Copy "setup.cfg" for the test settings -cp setup.cfg $TEST_DIR -cd $TEST_DIR - -if [[ $TRAVIS_CPU_ARCH == arm64 ]]; then - # Faster run of the source code tests - TEST_CMD="$TEST_CMD -n $CPU_COUNT" - - # Remove the option to test the docstring - sed -i -e 's/--doctest-modules//g' setup.cfg -fi - -if [[ -n $CHECK_WARNINGS ]]; then - TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning -Werror::numpy.VisibleDeprecationWarning" -fi - -$TEST_CMD sklearn diff --git a/build_tools/travis/test_wheels.sh b/build_tools/travis/test_wheels.sh deleted file mode 100755 index 11d4bd73cedd7..0000000000000 --- a/build_tools/travis/test_wheels.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -pip install --upgrade pip || travis_terminate $? -pip install pytest pytest-xdist || travis_terminate $? - -# Test that there are no links to system libraries in the threadpoolctl -# section of the show_versions output. -python -c "import sklearn; sklearn.show_versions()" || travis_terminate $? -python -m pytest -n $CPU_COUNT --pyargs sklearn || travis_terminate $? diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py index 5ba06c6ae0614..86da119ec4547 100644 --- a/build_tools/update_environments_and_lock_files.py +++ b/build_tools/update_environments_and_lock_files.py @@ -5,8 +5,11 @@ Two scenarios where this script can be useful: - make sure that the latest versions of all the dependencies are used in the CI. - We can run this script regularly and open a PR with the changes to the lock - files. This workflow will eventually be automated with a bot in the future. + There is a scheduled workflow that does this, see + .github/workflows/update-lock-files.yml. This is still useful to run this + script when when the automated PR fails and for example some packages need to + be pinned. You can add the pins to this script, run it, and open a PR with + the changes. - bump minimum dependencies in sklearn/_min_dependencies.py. Running this script will update both the CI environment files and associated lock files. You can then open a PR with the changes. @@ -27,26 +30,31 @@ sklearn/_min_dependencies.py - pip-tools +To only update the environment and lock files for specific builds, you can use +the command line argument `--select-build` which will take a regex. For example, +to only update the documentation builds you can use: +`python build_tools/update_environments_and_lock_files.py --select-build doc` """ +import json +import logging import re import subprocess import sys -from pathlib import Path -import shlex -import json -import logging from importlib.metadata import version +from pathlib import Path import click - from jinja2 import Environment +from packaging.version import Version logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) handler = logging.StreamHandler() logger.addHandler(handler) +TRACE = logging.DEBUG - 5 + common_dependencies_without_coverage = [ "python", @@ -62,10 +70,12 @@ "pytest", "pytest-xdist", "pillow", + "pip", + "ninja", + "meson-python", ] common_dependencies = common_dependencies_without_coverage + [ - "codecov", "pytest-cov", "coverage", ] @@ -73,9 +83,10 @@ docstring_test_dependencies = ["sphinx", "numpydoc"] default_package_constraints = { - # XXX: pin pytest-xdist to workaround: - # https://github.com/pytest-dev/pytest-xdist/issues/840 - "pytest-xdist": "2.5.0", + # TODO: somehow pytest 8 does not seem to work with meson editable + # install. Exit code is 5, i.e. no test collected + # This would be fixed by https://github.com/mesonbuild/meson-python/pull/569 + "pytest": "<8", } @@ -83,161 +94,203 @@ def remove_from(alist, to_remove): return [each for each in alist if each not in to_remove] -conda_build_metadata_list = [ +build_metadata_list = [ { - "build_name": "pylatest_conda_forge_mkl_linux-64", + "name": "pylatest_conda_forge_mkl_linux-64", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies + ["ccache"], + "conda_dependencies": common_dependencies + + [ + "ccache", + "pytorch", + "pytorch-cpu", + "polars", + "pyarrow", + "array-api-compat", + "array-api-strict", + ], "package_constraints": { "blas": "[build=mkl]", + "pytorch": "1.13", }, }, { - "build_name": "pylatest_conda_forge_mkl_osx-64", + "name": "pylatest_conda_forge_mkl_osx-64", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "osx-64", "channel": "conda-forge", "conda_dependencies": common_dependencies - + ["ccache", "compilers", "llvm-openmp"], + + [ + "ccache", + "compilers", + "llvm-openmp", + ], "package_constraints": { "blas": "[build=mkl]", }, }, { - "build_name": "pylatest_conda_mkl_no_openmp", + "name": "pylatest_conda_mkl_no_openmp", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "osx-64", "channel": "defaults", - "conda_dependencies": common_dependencies + ["ccache"], - "package_constraints": { - "blas": "[build=mkl]", - # 2022-06-09 currently mamba install 1.23 and scipy 1.7 which - # should be compatible but actually are not. This pin can be - # removed when scipy 1.8 is available in conda defaults channel. - # For more details, see - # https://github.com/scikit-learn/scikit-learn/pull/24363#issuecomment-1236927660 - # and https://github.com/scipy/scipy/issues/16964 - "numpy": "1.22", - # XXX: coverage is temporary pinned to 6.2 because 6.3 is not - # fork-safe and 6.4 is not available yet (July 2022) in conda - # defaults channel. For more details, see: - # https://github.com/nedbat/coveragepy/issues/1310 - "coverage": "6.2", - }, - }, - { - "build_name": "pylatest_conda_forge_mkl_no_coverage", - "folder": "build_tools/azure", - "platform": "linux-64", - "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage + ["ccache"], + "conda_dependencies": remove_from( + common_dependencies, ["cython", "threadpoolctl"] + ) + + ["ccache"], "package_constraints": { "blas": "[build=mkl]", + # scipy 1.12.x crashes on this platform (https://github.com/scipy/scipy/pull/20086) + # TODO: release scipy constraint when 1.13 is available in the "default" + # channel. + "scipy": "<1.12", }, + # TODO: put cython and threadpoolctl back to conda dependencies when required + # version is available on the main channel + "pip_dependencies": ["cython", "threadpoolctl"], }, { - "build_name": "py38_conda_defaults_openblas", + "name": "pymin_conda_defaults_openblas", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "defaults", - "conda_dependencies": common_dependencies + ["ccache"], + "conda_dependencies": remove_from( + common_dependencies, + ["pandas", "threadpoolctl", "pip", "ninja", "meson-python"], + ) + + ["ccache"], "package_constraints": { - "python": "3.8", + "python": "3.9", "blas": "[build=openblas]", - "numpy": "min", - "scipy": "min", + "numpy": "1.21", # the min version is not available on the defaults channel + "scipy": "1.7", # the min version has some low level crashes "matplotlib": "min", - "threadpoolctl": "2.2.0", - # XXX: coverage is temporary pinned to 6.2 because 6.3 is not - # fork-safe and 6.4 is not available yet (July 2022) in conda - # defaults channel. For more details, see: - # https://github.com/nedbat/coveragepy/issues/1310 - "coverage": "6.2", + "cython": "min", + "joblib": "min", + "threadpoolctl": "min", }, + # TODO: put pip dependencies back to conda dependencies when required + # version is available on the defaults channel. + "pip_dependencies": ["threadpoolctl"], }, { - "build_name": "py38_conda_forge_openblas_ubuntu_2204", + "name": "pymin_conda_forge_openblas_ubuntu_2204", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": common_dependencies_without_coverage + ["ccache"], - "package_constraints": {"python": "3.8", "blas": "[build=openblas]"}, + "conda_dependencies": ( + common_dependencies_without_coverage + + docstring_test_dependencies + + ["ccache"] + ), + "package_constraints": { + "python": "3.9", + "blas": "[build=openblas]", + }, }, { - "build_name": "pylatest_pip_openblas_pandas", + "name": "pylatest_pip_openblas_pandas", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "linux-64", "channel": "defaults", "conda_dependencies": ["python", "ccache"], - "pip_dependencies": remove_from(common_dependencies, ["python", "blas"]) - + docstring_test_dependencies - + ["lightgbm", "scikit-image"], + "pip_dependencies": ( + remove_from(common_dependencies, ["python", "blas", "pip"]) + + docstring_test_dependencies + + ["lightgbm", "scikit-image"] + ), "package_constraints": { "python": "3.9", }, }, { - "build_name": "pylatest_pip_scipy_dev", + "name": "pylatest_pip_scipy_dev", + "type": "conda", + "tag": "scipy-dev", "folder": "build_tools/azure", "platform": "linux-64", "channel": "defaults", "conda_dependencies": ["python", "ccache"], - "pip_dependencies": remove_from( - common_dependencies, - [ - "python", - "blas", - "matplotlib", - "pyamg", - # all the dependencies below have a development version - # installed in the CI, so they can be removed from the - # environment.yml - "numpy", - "scipy", - "pandas", - "cython", - "joblib", - "pillow", - ], - ) - + ["pooch"] - + docstring_test_dependencies - # python-dateutil is a dependency of pandas and pandas is removed from - # the environment.yml. Adding python-dateutil so it is pinned - + ["python-dateutil"], + "pip_dependencies": ( + remove_from( + common_dependencies, + [ + "python", + "blas", + "matplotlib", + "pyamg", + # all the dependencies below have a development version + # installed in the CI, so they can be removed from the + # environment.yml + "numpy", + "scipy", + "pandas", + "cython", + "joblib", + "pillow", + ], + ) + + ["pooch"] + + docstring_test_dependencies + # python-dateutil is a dependency of pandas and pandas is removed from + # the environment.yml. Adding python-dateutil so it is pinned + + ["python-dateutil"] + ), }, { - "build_name": "pypy3", + "name": "pypy3", + "type": "conda", + "tag": "pypy", "folder": "build_tools/azure", "platform": "linux-64", "channel": "conda-forge", - "conda_dependencies": ["pypy", "python"] - + remove_from( - common_dependencies_without_coverage, ["python", "pandas", "pillow"] - ) - + ["ccache"], + "conda_dependencies": ( + ["pypy", "python"] + + remove_from( + common_dependencies_without_coverage, ["python", "pandas", "pillow"] + ) + + ["ccache"] + ), "package_constraints": { "blas": "[build=openblas]", "python": "3.9", }, }, { - "build_name": "py38_conda_forge_mkl", + "name": "pymin_conda_forge_mkl", + "type": "conda", + "tag": "main-ci", "folder": "build_tools/azure", "platform": "win-64", "channel": "conda-forge", "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) - + ["wheel", "pip"], + + [ + "wheel", + "pip", + ], "package_constraints": { - "python": "3.8", + "python": "3.9", "blas": "[build=mkl]", }, }, { - "build_name": "doc_min_dependencies", - "folder": "build_tools/github", + "name": "doc_min_dependencies", + "type": "conda", + "tag": "main-ci", + "folder": "build_tools/circle", "platform": "linux-64", "channel": "conda-forge", "conda_dependencies": common_dependencies_without_coverage @@ -248,14 +301,16 @@ def remove_from(alist, to_remove): "compilers", "sphinx", "sphinx-gallery", + "sphinx-copybutton", "numpydoc", "sphinx-prompt", "plotly", + "polars", "pooch", ], "pip_dependencies": ["sphinxext-opengraph"], "package_constraints": { - "python": "3.8", + "python": "3.9", "numpy": "min", "scipy": "min", "matplotlib": "min", @@ -264,15 +319,19 @@ def remove_from(alist, to_remove): "sphinx": "min", "pandas": "min", "sphinx-gallery": "min", + "sphinx-copybutton": "min", "numpydoc": "min", "sphinx-prompt": "min", "sphinxext-opengraph": "min", "plotly": "min", + "polars": "min", }, }, { - "build_name": "doc", - "folder": "build_tools/github", + "name": "doc", + "type": "conda", + "tag": "main-ci", + "folder": "build_tools/circle", "platform": "linux-64", "channel": "conda-forge", "conda_dependencies": common_dependencies_without_coverage @@ -283,19 +342,24 @@ def remove_from(alist, to_remove): "compilers", "sphinx", "sphinx-gallery", + "sphinx-copybutton", "numpydoc", "sphinx-prompt", "plotly", + "polars", "pooch", + "sphinxext-opengraph", ], - "pip_dependencies": ["sphinxext-opengraph"], + "pip_dependencies": ["jupyterlite-sphinx", "jupyterlite-pyodide-kernel"], "package_constraints": { "python": "3.9", }, }, { - "build_name": "py39_conda_forge", - "folder": "build_tools/circle", + "name": "pymin_conda_forge", + "type": "conda", + "tag": "arm", + "folder": "build_tools/cirrus", "platform": "linux-aarch64", "channel": "conda-forge", "conda_dependencies": remove_from( @@ -306,25 +370,35 @@ def remove_from(alist, to_remove): "python": "3.9", }, }, -] - - -pip_build_metadata_list = [ { - "build_name": "debian_atlas_32bit", + "name": "debian_atlas_32bit", + "type": "pip", + "tag": "main-ci", "folder": "build_tools/azure", - "pip_dependencies": ["cython", "joblib", "threadpoolctl", "pytest"], + "pip_dependencies": [ + "cython", + "joblib", + "threadpoolctl", + "pytest", + "pytest-cov", + "ninja", + "meson-python", + ], "package_constraints": { "joblib": "min", - "threadpoolctl": "2.2.0", + "threadpoolctl": "3.1.0", "pytest": "min", + "pytest-cov": "min", # no pytest-xdist because it causes issue on 32bit + "cython": "min", }, # same Python version as in debian-32 build "python_version": "3.9.2", }, { - "build_name": "ubuntu_atlas", + "name": "ubuntu_atlas", + "type": "pip", + "tag": "main-ci", "folder": "build_tools/azure", "pip_dependencies": [ "cython", @@ -332,18 +406,21 @@ def remove_from(alist, to_remove): "threadpoolctl", "pytest", "pytest-xdist", + "ninja", + "meson-python", ], - "package_constraints": {"joblib": "min", "threadpoolctl": "min"}, - # Ubuntu 20.04 has 3.8.2 but only 3.8.5 is available for osx-arm64 on - # conda-forge. Chosing 3.8.5 so that this script can be run locally on - # osx-arm64 machines. This should not matter for pining versions with - # pip-compile - "python_version": "3.8.5", + "package_constraints": { + "joblib": "min", + "threadpoolctl": "min", + "cython": "min", + }, + "python_version": "3.10.4", }, ] def execute_command(command_list): + logger.debug(" ".join(command_list)) proc = subprocess.Popen( command_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) @@ -360,6 +437,7 @@ def execute_command(command_list): "stdout:\n{}\n" "stderr:\n{}\n".format(proc.returncode, command_str, out, err) ) + logger.log(TRACE, out) return out @@ -418,9 +496,10 @@ def get_conda_environment_content(build_metadata): def write_conda_environment(build_metadata): content = get_conda_environment_content(build_metadata) - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] folder_path = Path(build_metadata["folder"]) output_path = folder_path / f"{build_name}_environment.yml" + logger.debug(output_path) output_path.write_text(content) @@ -430,17 +509,25 @@ def write_all_conda_environments(build_metadata_list): def conda_lock(environment_path, lock_file_path, platform): - command = ( - f"conda-lock lock --mamba --kind explicit --platform {platform} " - f"--file {environment_path} --filename-template {lock_file_path}" + execute_command( + [ + "conda-lock", + "lock", + "--mamba", + "--kind", + "explicit", + "--platform", + platform, + "--file", + str(environment_path), + "--filename-template", + str(lock_file_path), + ] ) - logger.debug("conda-lock command: %s", command) - execute_command(shlex.split(command)) - def create_conda_lock_file(build_metadata): - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] folder_path = Path(build_metadata["folder"]) environment_path = folder_path / f"{build_name}_environment.yml" platform = build_metadata["platform"] @@ -454,7 +541,7 @@ def create_conda_lock_file(build_metadata): def write_all_conda_lock_files(build_metadata_list): for build_metadata in build_metadata_list: - logger.info(build_metadata["build_name"]) + logger.info(f"# Locking dependencies for {build_metadata['name']}") create_conda_lock_file(build_metadata) @@ -472,28 +559,33 @@ def get_pip_requirements_content(build_metadata): def write_pip_requirements(build_metadata): - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] content = get_pip_requirements_content(build_metadata) folder_path = Path(build_metadata["folder"]) output_path = folder_path / f"{build_name}_requirements.txt" + logger.debug(output_path) output_path.write_text(content) def write_all_pip_requirements(build_metadata_list): for build_metadata in build_metadata_list: - logger.info(build_metadata["build_name"]) write_pip_requirements(build_metadata) def pip_compile(pip_compile_path, requirements_path, lock_file_path): - command = f"{pip_compile_path} --upgrade {requirements_path} -o {lock_file_path}" - - logger.debug("pip-compile command: %s", command) - execute_command(shlex.split(command)) + execute_command( + [ + str(pip_compile_path), + "--upgrade", + str(requirements_path), + "-o", + str(lock_file_path), + ] + ) def write_pip_lock_file(build_metadata): - build_name = build_metadata["build_name"] + build_name = build_metadata["name"] python_version = build_metadata["python_version"] environment_name = f"pip-tools-python{python_version}" # To make sure that the Python used to create the pip lock file is the same @@ -501,13 +593,21 @@ def write_pip_lock_file(build_metadata): # create a conda environment with the correct Python version and # pip-compile and run pip-compile in this environment - command = ( - "conda create -c conda-forge -n" - f" pip-tools-python{python_version} python={python_version} pip-tools -y" + execute_command( + [ + "conda", + "create", + "-c", + "conda-forge", + "-n", + f"pip-tools-python{python_version}", + f"python={python_version}", + "pip-tools", + "-y", + ] ) - execute_command(shlex.split(command)) - json_output = execute_command(shlex.split("conda info --json")) + json_output = execute_command(["conda", "info", "--json"]) conda_info = json.loads(json_output) environment_folder = [ each for each in conda_info["envs"] if each.endswith(environment_name) @@ -523,6 +623,7 @@ def write_pip_lock_file(build_metadata): def write_all_pip_lock_files(build_metadata_list): for build_metadata in build_metadata_list: + logger.info(f"# Locking dependencies for {build_metadata['name']}") write_pip_lock_file(build_metadata) @@ -540,33 +641,105 @@ def check_conda_lock_version(): ) +def check_conda_version(): + # Avoid issues with glibc (https://github.com/conda/conda-lock/issues/292) + # or osx (https://github.com/conda/conda-lock/issues/408) virtual package. + # The glibc one has been fixed in conda 23.1.0 and the osx has been fixed + # in conda 23.7.0. + conda_info_output = execute_command(["conda", "info", "--json"]) + + conda_info = json.loads(conda_info_output) + conda_version = Version(conda_info["conda_version"]) + + if Version("22.9.0") < conda_version < Version("23.7"): + raise RuntimeError( + f"conda version should be <= 22.9.0 or >= 23.7 got: {conda_version}" + ) + + @click.command() @click.option( "--select-build", default="", - help="Regex to restrict the builds we want to update environment and lock files", + help=( + "Regex to filter the builds we want to update environment and lock files. By" + " default all the builds are selected." + ), ) -def main(select_build): +@click.option( + "--skip-build", + default=None, + help="Regex to skip some builds from the builds selected by --select-build", +) +@click.option( + "--select-tag", + default=None, + help=( + "Tag to filter the builds, e.g. 'main-ci' or 'scipy-dev'. " + "This is an additional filtering on top of --select-build." + ), +) +@click.option( + "-v", + "--verbose", + is_flag=True, + help="Print commands executed by the script", +) +@click.option( + "-vv", + "--very-verbose", + is_flag=True, + help="Print output of commands executed by the script", +) +def main(select_build, skip_build, select_tag, verbose, very_verbose): + if verbose: + logger.setLevel(logging.DEBUG) + if very_verbose: + logger.setLevel(TRACE) + handler.setLevel(TRACE) check_conda_lock_version() + check_conda_version() + + filtered_build_metadata_list = [ + each for each in build_metadata_list if re.search(select_build, each["name"]) + ] + if select_tag is not None: + filtered_build_metadata_list = [ + each for each in build_metadata_list if each["tag"] == select_tag + ] + if skip_build is not None: + filtered_build_metadata_list = [ + each + for each in filtered_build_metadata_list + if not re.search(skip_build, each["name"]) + ] + + selected_build_info = "\n".join( + f" - {each['name']}, type: {each['type']}, tag: {each['tag']}" + for each in filtered_build_metadata_list + ) + selected_build_message = ( + f"# {len(filtered_build_metadata_list)} selected builds\n{selected_build_info}" + ) + logger.info(selected_build_message) + filtered_conda_build_metadata_list = [ - each - for each in conda_build_metadata_list - if re.search(select_build, each["build_name"]) + each for each in filtered_build_metadata_list if each["type"] == "conda" ] - logger.info("Writing conda environments") - write_all_conda_environments(filtered_conda_build_metadata_list) - logger.info("Writing conda lock files") - write_all_conda_lock_files(filtered_conda_build_metadata_list) + if filtered_conda_build_metadata_list: + logger.info("# Writing conda environments") + write_all_conda_environments(filtered_conda_build_metadata_list) + logger.info("# Writing conda lock files") + write_all_conda_lock_files(filtered_conda_build_metadata_list) filtered_pip_build_metadata_list = [ - each - for each in pip_build_metadata_list - if re.search(select_build, each["build_name"]) + each for each in filtered_build_metadata_list if each["type"] == "pip" ] - logger.info("Writing pip requirements") - write_all_pip_requirements(filtered_pip_build_metadata_list) - logger.info("Writing pip lock files") - write_all_pip_lock_files(filtered_pip_build_metadata_list) + if filtered_pip_build_metadata_list: + logger.info("# Writing pip requirements") + write_all_pip_requirements(filtered_pip_build_metadata_list) + logger.info("# Writing pip lock files") + write_all_pip_lock_files(filtered_pip_build_metadata_list) if __name__ == "__main__": diff --git a/build_tools/wheels/build_wheels.sh b/build_tools/wheels/build_wheels.sh new file mode 100755 index 0000000000000..d2df4e3936829 --- /dev/null +++ b/build_tools/wheels/build_wheels.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +set -e +set -x + +# Set environment variables to make our wheel build easier to reproduce byte +# for byte from source. See https://reproducible-builds.org/. The long term +# motivation would be to be able to detect supply chain attacks. +# +# In particular we set SOURCE_DATE_EPOCH to the commit date of the last commit. +# +# XXX: setting those environment variables is not enough. See the following +# issue for more details on what remains to do: +# https://github.com/scikit-learn/scikit-learn/issues/28151 +export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct) +export PYTHONHASHSEED=0 + +# OpenMP is not present on macOS by default +if [[ $(uname) == "Darwin" ]]; then + # Make sure to use a libomp version binary compatible with the oldest + # supported version of the macos SDK as libomp will be vendored into the + # scikit-learn wheels for macos. + + if [[ "$CIBW_BUILD" == *-macosx_arm64 ]]; then + if [[ $(uname -m) == "x86_64" ]]; then + # arm64 builds must cross compile because the CI instance is x86 + # This turns off the computation of the test program in + # sklearn/_build_utils/pre_build_helpers.py + export PYTHON_CROSSENV=1 + fi + # SciPy requires 12.0 on arm to prevent kernel panics + # https://github.com/scipy/scipy/issues/14688 + # We use the same deployment target to match SciPy. + export MACOSX_DEPLOYMENT_TARGET=12.0 + OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-arm64/llvm-openmp-11.1.0-hf3c4609_1.tar.bz2" + else + export MACOSX_DEPLOYMENT_TARGET=10.9 + OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2" + fi + + sudo conda create -n build $OPENMP_URL + PREFIX="$CONDA_HOME/envs/build" + + export CC=/usr/bin/clang + export CXX=/usr/bin/clang++ + export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp" + export CFLAGS="$CFLAGS -I$PREFIX/include" + export CXXFLAGS="$CXXFLAGS -I$PREFIX/include" + export LDFLAGS="$LDFLAGS -Wl,-rpath,$PREFIX/lib -L$PREFIX/lib -lomp" +fi + + +if [[ "$GITHUB_EVENT_NAME" == "schedule" || "$CIRRUS_CRON" == "nightly" ]]; then + # Nightly build: See also `../github/upload_anaconda.sh` (same branching). + # To help with NumPy 2.0 transition, ensure that we use the NumPy 2.0 + # nightlies. This lives on the edge and opts-in to all pre-releases. + # That could be an issue, in which case no-build-isolation and a targeted + # NumPy install may be necessary, instead. + export CIBW_BUILD_FRONTEND='pip; args: --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"' +fi + +# The version of the built dependencies are specified +# in the pyproject.toml file, while the tests are run +# against the most recent version of the dependencies + +python -m pip install cibuildwheel +python -m cibuildwheel --output-dir wheelhouse diff --git a/build_tools/wheels/test_wheels.sh b/build_tools/wheels/test_wheels.sh new file mode 100755 index 0000000000000..e8cdf4b3ea8a2 --- /dev/null +++ b/build_tools/wheels/test_wheels.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -e +set -x + +python -c "import joblib; print(f'Number of cores (physical): \ +{joblib.cpu_count()} ({joblib.cpu_count(only_physical_cores=True)})')" + +# Test that there are no links to system libraries in the +# threadpoolctl output section of the show_versions output: +python -c "import sklearn; sklearn.show_versions()" + +if pip show -qq pytest-xdist; then + XDIST_WORKERS=$(python -c "import joblib; print(joblib.cpu_count(only_physical_cores=True))") + pytest --pyargs sklearn -n $XDIST_WORKERS +else + pytest --pyargs sklearn +fi diff --git a/conftest.py b/conftest.py deleted file mode 100644 index e4e478d2d72d7..0000000000000 --- a/conftest.py +++ /dev/null @@ -1,6 +0,0 @@ -# Even if empty this file is useful so that when running from the root folder -# ./sklearn is added to sys.path by pytest. See -# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more -# details. For example, this allows to build extensions in place and run pytest -# doc/modules/clustering.rst and use sklearn from the local folder rather than -# the one from site-packages. diff --git a/doc/Makefile b/doc/Makefile index 02656feba0710..44f02585f6205 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,27 +2,30 @@ # # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = -T SPHINXBUILD ?= sphinx-build PAPER = BUILDDIR = _build -# Disable multiple jobs on OSX -ifeq ($(shell uname), Darwin) - SPHINX_NUMJOBS ?= 1 -else - SPHINX_NUMJOBS ?= auto -endif - ifneq ($(EXAMPLES_PATTERN),) EXAMPLES_PATTERN_OPTS := -D sphinx_gallery_conf.filename_pattern="$(EXAMPLES_PATTERN)" endif +ifeq ($(CI), true) + # On CircleCI using -j2 does not seem to speed up the html-noplot build + SPHINX_NUMJOBS_NOPLOT_DEFAULT=1 +else ifeq ($(shell uname), Darwin) + # Avoid stalling issues on MacOS + SPHINX_NUMJOBS_NOPLOT_DEFAULT=1 +else + SPHINX_NUMJOBS_NOPLOT_DEFAULT=auto +endif + # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -T -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\ - -j$(SPHINX_NUMJOBS) $(EXAMPLES_PATTERN_OPTS) . +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\ + $(EXAMPLES_PATTERN_OPTS) . .PHONY: help clean html dirhtml ziphtml pickle json latex latexpdf changes linkcheck doctest optipng @@ -48,17 +51,27 @@ clean: -rm -rf generated/* -rm -rf modules/generated/ +# Default to SPHINX_NUMJOBS=1 for full documentation build. Using +# SPHINX_NUMJOBS!=1 may actually slow down the build, or cause weird issues in +# the CI (job stalling or EOFError), see +# https://github.com/scikit-learn/scikit-learn/pull/25836 or +# https://github.com/scikit-learn/scikit-learn/pull/25809 +html: SPHINX_NUMJOBS ?= 1 html: # These two lines make the build a bit more lengthy, and the # the embedding of images more robust rm -rf $(BUILDDIR)/html/_images #rm -rf _build/doctrees/ - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html/stable + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) -j$(SPHINX_NUMJOBS) $(BUILDDIR)/html/stable @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html/stable" +# Default to SPHINX_NUMJOBS=auto (except on MacOS and CI) since this makes +# html-noplot build faster +html-noplot: SPHINX_NUMJOBS ?= $(SPHINX_NUMJOBS_NOPLOT_DEFAULT) html-noplot: - $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html/stable + $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) -j$(SPHINX_NUMJOBS) \ + $(BUILDDIR)/html/stable @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html/stable." diff --git a/doc/README.md b/doc/README.md index 8cace706efd35..537ed85006006 100644 --- a/doc/README.md +++ b/doc/README.md @@ -1,6 +1,6 @@ # Documentation for scikit-learn This directory contains the full manual and website as displayed at -http://scikit-learn.org. See -http://scikit-learn.org/dev/developers/contributing.html#documentation for -detailed information about the documentation. +https://scikit-learn.org. See +https://scikit-learn.org/dev/developers/contributing.html#documentation for +detailed information about the documentation. diff --git a/doc/about.rst b/doc/about.rst index 989b5f290bdc1..035bddb0ea4dc 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -22,25 +22,46 @@ Governance The decision making process and governance structure of scikit-learn is laid out in the :ref:`governance document `. -Authors -------- +.. The "author" anchors below is there to ensure that old html links (in + the form of "about.html#author" still work) + +.. _authors: + +The people behind scikit-learn +------------------------------- + +Scikit-learn is a community project, developed by a large group of +people, all across the world. A few teams, listed below, have central +roles, however a more complete list of contributors can be found `on +github +`__. -The following people are currently core contributors to scikit-learn's development -and maintenance: +Maintainers Team +................ -.. include:: authors.rst +The following people are currently maintainers, in charge of +consolidating scikit-learn's development and maintenance: + +.. include:: maintainers.rst Please do not email the authors directly to ask for assistance or report issues. Instead, please see `What's the best way to ask questions about scikit-learn -`_ +`_ in the FAQ. .. seealso:: :ref:`How you can contribute to the project ` +Documentation Team +.................. + +The following people help with documenting the project: + +.. include:: documentation_team.rst + Contributor Experience Team ---------------------------- +........................... The following people are active contributors who also help with :ref:`triaging issues `, PRs, and general @@ -49,7 +70,7 @@ maintenance: .. include:: contributor_experience_team.rst Communication Team ------------------- +.................. The following people help with :ref:`communication around scikit-learn `. @@ -63,7 +84,7 @@ Emeritus Core Developers The following people have been active contributors in the past, but are no longer active in the project: -.. include:: authors_emeritus.rst +.. include:: maintainers_emeritus.rst Emeritus Communication Team --------------------------- @@ -73,6 +94,13 @@ past, but no longer have communication responsibilities: .. include:: communication_team_emeritus.rst +Emeritus Contributor Experience Team +------------------------------------ + +The following people have been active in the contributor experience team in the +past: + +.. include:: contributor_experience_team_emeritus.rst .. _citing-scikit-learn: @@ -82,44 +110,44 @@ Citing scikit-learn If you use scikit-learn in a scientific publication, we would appreciate citations to the following paper: - `Scikit-learn: Machine Learning in Python - `_, Pedregosa - *et al.*, JMLR 12, pp. 2825-2830, 2011. +`Scikit-learn: Machine Learning in Python +`_, Pedregosa +*et al.*, JMLR 12, pp. 2825-2830, 2011. - Bibtex entry:: +Bibtex entry:: - @article{scikit-learn, - title={Scikit-learn: Machine Learning in {P}ython}, - author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. - and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. - and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and - Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, - journal={Journal of Machine Learning Research}, - volume={12}, - pages={2825--2830}, - year={2011} - } + @article{scikit-learn, + title={Scikit-learn: Machine Learning in {P}ython}, + author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. + and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. + and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and + Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, + journal={Journal of Machine Learning Research}, + volume={12}, + pages={2825--2830}, + year={2011} + } If you want to cite scikit-learn for its API or design, you may also want to consider the following paper: - :arxiv:`API design for machine learning software: experiences from the scikit-learn - project <1309.0238>`, Buitinck *et al.*, 2013. +:arxiv:`API design for machine learning software: experiences from the scikit-learn +project <1309.0238>`, Buitinck *et al.*, 2013. - Bibtex entry:: +Bibtex entry:: - @inproceedings{sklearn_api, - author = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and - Fabian Pedregosa and Andreas Mueller and Olivier Grisel and - Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort - and Jaques Grobler and Robert Layton and Jake VanderPlas and - Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux}, - title = {{API} design for machine learning software: experiences from the scikit-learn - project}, - booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning}, - year = {2013}, - pages = {108--122}, - } + @inproceedings{sklearn_api, + author = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and + Fabian Pedregosa and Andreas Mueller and Olivier Grisel and + Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort + and Jaques Grobler and Robert Layton and Jake VanderPlas and + Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux}, + title = {{API} design for machine learning software: experiences from the scikit-learn + project}, + booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning}, + year = {2013}, + pages = {108--122}, + } Artwork ------- @@ -140,6 +168,34 @@ The project would like to thank the following funders. ................................... + +.. raw:: html + +
+
+ +`:probabl. `_ funds Adrin Jalali, Arturo Amor, +François Goupil, Guillaume Lemaitre, Jérémie du Boisberranger, Olivier Grisel, and +Stefanie Senger. + +.. raw:: html + +
+ +
+ +.. image:: images/probabl.png + :width: 75pt + :align: center + :target: https://probabl.ai + +.. raw:: html + +
+
+ +.......... + .. raw:: html
@@ -147,43 +203,39 @@ The project would like to thank the following funders. The `Members `_ of the `Scikit-Learn Consortium at Inria Foundation -`_ fund Olivier -Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger. +`_ help at maintaining and +improving the project through their financial support. .. raw:: html
-.. |msn| image:: images/microsoft.png - :width: 100pt - :target: https://www.microsoft.com/ - -.. |bcg| image:: images/bcg.png - :width: 100pt - :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx +.. |chanel| image:: images/chanel.png + :width: 55pt + :target: https://www.chanel.com .. |axa| image:: images/axa.png - :width: 50pt + :width: 40pt :target: https://www.axa.fr/ .. |bnp| image:: images/bnp.png - :width: 150pt + :width: 120pt :target: https://www.bnpparibascardif.com/ -.. |fujitsu| image:: images/fujitsu.png - :width: 100pt - :target: https://www.fujitsu.com/global/ - .. |dataiku| image:: images/dataiku.png - :width: 70pt + :width: 55pt :target: https://www.dataiku.com/ -.. |aphp| image:: images/logo_APHP_text.png - :width: 150pt - :target: https://aphp.fr/ +.. |hf| image:: images/huggingface_logo-noborder.png + :width: 55pt + :target: https://huggingface.co + +.. |nvidia| image:: images/nvidia.png + :width: 55pt + :target: https://www.nvidia.com .. |inria| image:: images/inria-logo.jpg - :width: 100pt + :width: 75pt :target: https://www.inria.fr @@ -192,27 +244,27 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
.. table:: - :class: sk-sponsor-table align-default + :class: sk-sponsor-table - +---------+----------+ - | |bcg| | - +---------+----------+ - | | - +---------+----------+ - | |axa| | |bnp| | - +---------+----------+ - ||fujitsu|| |msn| | - +---------+----------+ - | | - +---------+----------+ - | |dataiku| | - +---------+----------+ - | |aphp| | - +---------+----------+ - | | - +---------+----------+ - | |inria| | - +---------+----------+ + +----------+-----------+ + | |chanel| | + +----------+-----------+ + | | + +----------+-----------+ + | |axa| | |bnp| | + +----------+-----------+ + | | + +----------+-----------+ + | |nvidia| | |hf| | + +----------+-----------+ + | | + +----------+-----------+ + | |dataiku| | + +----------+-----------+ + | | + +----------+-----------+ + | |inria| | + +----------+-----------+ .. raw:: html @@ -226,7 +278,8 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
-`Hugging Face `_ funds Adrin Jalali since 2022. +`NVidia `_ funds Tim Head since 2022 +and is part of the scikit-learn consortium at Inria. .. raw:: html @@ -234,17 +287,17 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
-.. image:: images/huggingface_logo-noborder.png +.. image:: images/nvidia.png :width: 55pt :align: center - :target: https://huggingface.co/ + :target: https://nvidia.com .. raw:: html
-........... +.......... .. raw:: html @@ -276,7 +329,7 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
-`Quansight Labs `_ funds Thomas J. Fan since 2021. +`Quansight Labs `_ funds Lucy Liu since 2022. .. raw:: html @@ -294,9 +347,61 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
+........... + +.. raw:: html + +
+
+ +`Tidelift `_ supports the project via their service +agreement. + +.. raw:: html + +
+ +
+ +.. image:: images/Tidelift-logo-on-light.svg + :width: 100pt + :align: center + :target: https://tidelift.com/ + +.. raw:: html + +
+
+ Past Sponsors ............. +.. raw:: html + +
+
+ +`Quansight Labs `_ funded Meekail Zain in 2022 and 2023 and, +funded Thomas J. Fan from 2021 to 2023. + +.. raw:: html + +
+ +
+ +.. image:: images/quansight-labs.png + :width: 100pt + :align: center + :target: https://labs.quansight.org + +.. raw:: html + +
+
+ +........... + .. raw:: html
@@ -559,6 +664,31 @@ The `NeuroDebian `_ project providing `Debian `Dr. James V. Haxby `_ (`Dartmouth College `_). +................... + +The following organizations funded the scikit-learn consortium at Inria in +the past: + +.. |msn| image:: images/microsoft.png + :width: 100pt + :target: https://www.microsoft.com/ + +.. |bcg| image:: images/bcg.png + :width: 100pt + :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx + +.. |fujitsu| image:: images/fujitsu.png + :width: 100pt + :target: https://www.fujitsu.com/global/ + +.. |aphp| image:: images/logo_APHP_text.png + :width: 150pt + :target: https://aphp.fr/ + + +|bcg| |msn| |fujitsu| |aphp| + + Sprints ------- @@ -619,7 +749,7 @@ Infrastructure support ---------------------- - We would also like to thank `Microsoft Azure - `_, `Travis Cl `_, + `_, `Cirrus Cl `_, `CircleCl `_ for free CPU time on their Continuous Integration servers, and `Anaconda Inc. `_ for the storage they provide for our staging and nightly builds. diff --git a/doc/common_pitfalls.rst b/doc/common_pitfalls.rst index 308edb4c67c79..41eb16665a612 100644 --- a/doc/common_pitfalls.rst +++ b/doc/common_pitfalls.rst @@ -104,6 +104,26 @@ be the average of the train subset, **not** the average of all the data. If the test subset is included in the average calculation, information from the test subset is influencing the model. +How to avoid data leakage +------------------------- + +Below are some tips on avoiding data leakage: + +* Always split the data into train and test subsets first, particularly + before any preprocessing steps. +* Never include test data when using the `fit` and `fit_transform` + methods. Using all the data, e.g., `fit(X)`, can result in overly optimistic + scores. + + Conversely, the `transform` method should be used on both train and test + subsets as the same preprocessing should be applied to all the data. + This can be achieved by using `fit_transform` on the train subset and + `transform` on the test subset. +* The scikit-learn :ref:`pipeline ` is a great way to prevent data + leakage as it ensures that the appropriate method is performed on the + correct data subset. The pipeline is ideal for use in cross-validation + and hyper-parameter tuning functions. + An example of data leakage during preprocessing is detailed below. Data leakage during pre-processing @@ -211,27 +231,8 @@ method is used during fitting and predicting:: >>> from sklearn.model_selection import cross_val_score >>> scores = cross_val_score(pipeline, X, y) >>> print(f"Mean accuracy: {scores.mean():.2f}+/-{scores.std():.2f}") - Mean accuracy: 0.45+/-0.07 + Mean accuracy: 0.46+/-0.07 -How to avoid data leakage -------------------------- - -Below are some tips on avoiding data leakage: - -* Always split the data into train and test subsets first, particularly - before any preprocessing steps. -* Never include test data when using the `fit` and `fit_transform` - methods. Using all the data, e.g., `fit(X)`, can result in overly optimistic - scores. - - Conversely, the `transform` method should be used on both train and test - subsets as the same preprocessing should be applied to all the data. - This can be achieved by using `fit_transform` on the train subset and - `transform` on the test subset. -* The scikit-learn :ref:`pipeline ` is a great way to prevent data - leakage as it ensures that the appropriate method is performed on the - correct data subset. The pipeline is ideal for use in cross-validation - and hyper-parameter tuning functions. .. _randomness: @@ -243,7 +244,7 @@ Some scikit-learn objects are inherently random. These are usually estimators splitters (e.g. :class:`~sklearn.model_selection.KFold`). The randomness of these objects is controlled via their `random_state` parameter, as described in the :term:`Glossary `. This section expands on the glossary -entry, and describes good practices and common pitfalls w.r.t. to this +entry, and describes good practices and common pitfalls w.r.t. this subtle parameter. .. note:: Recommendation summary @@ -316,7 +317,7 @@ inter-dependent. For example, two estimators that share the same we discuss cloning. This point is important to keep in mind when debugging. If we had passed an integer to the `random_state` parameter of the -:class:`~sklearn.ensemble.RandomForestClassifier`, we would have obtained the +:class:`~sklearn.linear_model.SGDClassifier`, we would have obtained the same models, and thus the same scores each time. When we pass an integer, the same RNG is used across all calls to `fit`. What internally happens is that even though the RNG is consumed when `fit` is called, it is always reset to @@ -413,10 +414,12 @@ it will allow the estimator RNG to vary for each fold. illustration purpose: what matters is what we pass to the :class:`~sklearn.ensemble.RandomForestClassifier` estimator. +|details-start| **Cloning** +|details-split| Another subtle side effect of passing `RandomState` instances is how -:func:`~sklearn.clone` will work:: +:func:`~sklearn.base.clone` will work:: >>> from sklearn import clone >>> from sklearn.ensemble import RandomForestClassifier @@ -439,14 +442,16 @@ If an integer were passed, `a` and `b` would be exact clones and they would not influence each other. .. warning:: - Even though :func:`~sklearn.clone` is rarely used in user code, it is + Even though :func:`~sklearn.base.clone` is rarely used in user code, it is called pervasively throughout scikit-learn codebase: in particular, most meta-estimators that accept non-fitted estimators call - :func:`~sklearn.clone` internally + :func:`~sklearn.base.clone` internally (:class:`~sklearn.model_selection.GridSearchCV`, :class:`~sklearn.ensemble.StackingClassifier`, :class:`~sklearn.calibration.CalibratedClassifierCV`, etc.). +|details-end| + CV splitters ............ @@ -553,7 +558,7 @@ When we evaluate a randomized estimator performance by cross-validation, we want to make sure that the estimator can yield accurate predictions for new data, but we also want to make sure that the estimator is robust w.r.t. its random initialization. For example, we would like the random weights -initialization of a :class:`~sklearn.linear_model.SGDCLassifier` to be +initialization of a :class:`~sklearn.linear_model.SGDClassifier` to be consistently good across all folds: otherwise, when we train that estimator on new data, we might get unlucky and the random initialization may lead to bad performance. Similarly, we want a random forest to be robust w.r.t the diff --git a/doc/communication_team.rst b/doc/communication_team.rst index 2a45e81d8a20a..30e4f1169cfc9 100644 --- a/doc/communication_team.rst +++ b/doc/communication_team.rst @@ -11,6 +11,6 @@

-

francoisgoupil

+

François Goupil

+
-
\ No newline at end of file diff --git a/doc/communication_team_emeritus.rst b/doc/communication_team_emeritus.rst index 8604bf2742473..d5ef7df59238e 100644 --- a/doc/communication_team_emeritus.rst +++ b/doc/communication_team_emeritus.rst @@ -1 +1 @@ -- Reshama Shaikh \ No newline at end of file +- Reshama Shaikh diff --git a/doc/computing/computational_performance.rst b/doc/computing/computational_performance.rst index bb8a130d5f71e..d6864689502c2 100644 --- a/doc/computing/computational_performance.rst +++ b/doc/computing/computational_performance.rst @@ -39,10 +39,11 @@ machine learning toolkit is the latency at which predictions can be made in a production environment. The main factors that influence the prediction latency are - 1. Number of features - 2. Input data representation and sparsity - 3. Model complexity - 4. Feature extraction + +1. Number of features +2. Input data representation and sparsity +3. Model complexity +4. Feature extraction A last major parameter is also the possibility to do predictions in bulk or one-at-a-time mode. @@ -195,7 +196,7 @@ support vectors. .. centered:: |nusvr_model_complexity| For :mod:`sklearn.ensemble` of trees (e.g. RandomForest, GBT, -ExtraTrees etc) the number of trees and their depth play the most +ExtraTrees, etc.) the number of trees and their depth play the most important role. Latency and throughput should scale linearly with the number of trees. In this case we used directly the ``n_estimators`` parameter of :class:`~ensemble.GradientBoostingRegressor`. @@ -224,9 +225,9 @@ files, tokenizing the text and hashing it into a common vector space) is taking 100 to 500 times more time than the actual prediction code, depending on the chosen model. - .. |prediction_time| image:: ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_004.png - :target: ../auto_examples/applications/plot_out_of_core_classification.html - :scale: 80 +.. |prediction_time| image:: ../auto_examples/applications/images/sphx_glr_plot_out_of_core_classification_004.png + :target: ../auto_examples/applications/plot_out_of_core_classification.html + :scale: 80 .. centered:: |prediction_time| @@ -283,14 +284,15 @@ scikit-learn install with the following command:: python -c "import sklearn; sklearn.show_versions()" Optimized BLAS / LAPACK implementations include: - - Atlas (need hardware specific tuning by rebuilding on the target machine) - - OpenBLAS - - MKL - - Apple Accelerate and vecLib frameworks (OSX only) + +- Atlas (need hardware specific tuning by rebuilding on the target machine) +- OpenBLAS +- MKL +- Apple Accelerate and vecLib frameworks (OSX only) More information can be found on the `NumPy install page `_ and in this -`blog post `_ +`blog post `_ from Daniel Nouri which has some nice step by step install instructions for Debian / Ubuntu. @@ -364,5 +366,5 @@ sufficient to not generate the relevant features, leaving their columns empty. Links ...... - - :ref:`scikit-learn developer performance documentation ` - - `Scipy sparse matrix formats documentation `_ +- :ref:`scikit-learn developer performance documentation ` +- `Scipy sparse matrix formats documentation `_ diff --git a/doc/computing/parallelism.rst b/doc/computing/parallelism.rst index 97e3e2866083f..53cef5603c5be 100644 --- a/doc/computing/parallelism.rst +++ b/doc/computing/parallelism.rst @@ -87,15 +87,15 @@ will use as many threads as possible, i.e. as many threads as logical cores. You can control the exact number of threads that are used either: - - via the ``OMP_NUM_THREADS`` environment variable, for instance when: - running a python script: +- via the ``OMP_NUM_THREADS`` environment variable, for instance when: + running a python script: - .. prompt:: bash $ + .. prompt:: bash $ - OMP_NUM_THREADS=4 python my_script.py + OMP_NUM_THREADS=4 python my_script.py - - or via `threadpoolctl` as explained by `this piece of documentation - `_. +- or via `threadpoolctl` as explained by `this piece of documentation + `_. Parallel NumPy and SciPy routines from numerical libraries .......................................................... @@ -107,15 +107,15 @@ such as MKL, OpenBLAS or BLIS. You can control the exact number of threads used by BLAS for each library using environment variables, namely: - - ``MKL_NUM_THREADS`` sets the number of thread MKL uses, - - ``OPENBLAS_NUM_THREADS`` sets the number of threads OpenBLAS uses - - ``BLIS_NUM_THREADS`` sets the number of threads BLIS uses +- ``MKL_NUM_THREADS`` sets the number of thread MKL uses, +- ``OPENBLAS_NUM_THREADS`` sets the number of threads OpenBLAS uses +- ``BLIS_NUM_THREADS`` sets the number of threads BLIS uses Note that BLAS & LAPACK implementations can also be impacted by `OMP_NUM_THREADS`. To check whether this is the case in your environment, you can inspect how the number of threads effectively used by those libraries -is affected when running the the following command in a bash or zsh terminal -for different values of `OMP_NUM_THREADS`:: +is affected when running the following command in a bash or zsh terminal +for different values of `OMP_NUM_THREADS`: .. prompt:: bash $ @@ -299,6 +299,13 @@ When this environment variable is set to a non zero value, the `Cython` derivative, `boundscheck` is set to `True`. This is useful for finding segfaults. +`SKLEARN_BUILD_ENABLE_DEBUG_SYMBOLS` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When this environment variable is set to a non zero value, the debug symbols +will be included in the compiled C extensions. Only debug symbols for POSIX +systems is configured. + `SKLEARN_PAIRWISE_DIST_CHUNK_SIZE` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -309,3 +316,29 @@ most machines. Users looking for the best performance might want to tune this variable using powers of 2 so as to get the best parallelism behavior for their hardware, especially with respect to their caches' sizes. + +`SKLEARN_WARNINGS_AS_ERRORS` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This environment variable is used to turn warnings into errors in tests and +documentation build. + +Some CI (Continuous Integration) builds set `SKLEARN_WARNINGS_AS_ERRORS=1`, for +example to make sure that we catch deprecation warnings from our dependencies +and that we adapt our code. + +To locally run with the same "warnings as errors" setting as in these CI builds +you can set `SKLEARN_WARNINGS_AS_ERRORS=1`. + +By default, warnings are not turned into errors. This is the case if +`SKLEARN_WARNINGS_AS_ERRORS` is unset, or `SKLEARN_WARNINGS_AS_ERRORS=0`. + +This environment variable use specific warning filters to ignore some warnings, +since sometimes warnings originate from third-party libraries and there is not +much we can do about it. You can see the warning filters in the +`_get_warnings_filters_info_list` function in `sklearn/utils/_testing.py`. + +Note that for documentation build, `SKLEARN_WARNING_AS_ERRORS=1` is checking +that the documentation build, in particular running examples, does not produce +any warnings. This is different from the `-W` `sphinx-build` argument that +catches syntax warnings in the rst files. diff --git a/doc/computing/scaling_strategies.rst b/doc/computing/scaling_strategies.rst index 277d499f4cc13..143643131b0e8 100644 --- a/doc/computing/scaling_strategies.rst +++ b/doc/computing/scaling_strategies.rst @@ -20,9 +20,9 @@ data that cannot fit in a computer's main memory (RAM). Here is a sketch of a system designed to achieve this goal: - 1. a way to stream instances - 2. a way to extract features from instances - 3. an incremental algorithm +1. a way to stream instances +2. a way to extract features from instances +3. an incremental algorithm Streaming instances .................... @@ -62,29 +62,29 @@ balances relevancy and memory footprint could involve some tuning [1]_. Here is a list of incremental estimators for different tasks: - - Classification - + :class:`sklearn.naive_bayes.MultinomialNB` - + :class:`sklearn.naive_bayes.BernoulliNB` - + :class:`sklearn.linear_model.Perceptron` - + :class:`sklearn.linear_model.SGDClassifier` - + :class:`sklearn.linear_model.PassiveAggressiveClassifier` - + :class:`sklearn.neural_network.MLPClassifier` - - Regression - + :class:`sklearn.linear_model.SGDRegressor` - + :class:`sklearn.linear_model.PassiveAggressiveRegressor` - + :class:`sklearn.neural_network.MLPRegressor` - - Clustering - + :class:`sklearn.cluster.MiniBatchKMeans` - + :class:`sklearn.cluster.Birch` - - Decomposition / feature Extraction - + :class:`sklearn.decomposition.MiniBatchDictionaryLearning` - + :class:`sklearn.decomposition.IncrementalPCA` - + :class:`sklearn.decomposition.LatentDirichletAllocation` - + :class:`sklearn.decomposition.MiniBatchNMF` - - Preprocessing - + :class:`sklearn.preprocessing.StandardScaler` - + :class:`sklearn.preprocessing.MinMaxScaler` - + :class:`sklearn.preprocessing.MaxAbsScaler` +- Classification + + :class:`sklearn.naive_bayes.MultinomialNB` + + :class:`sklearn.naive_bayes.BernoulliNB` + + :class:`sklearn.linear_model.Perceptron` + + :class:`sklearn.linear_model.SGDClassifier` + + :class:`sklearn.linear_model.PassiveAggressiveClassifier` + + :class:`sklearn.neural_network.MLPClassifier` +- Regression + + :class:`sklearn.linear_model.SGDRegressor` + + :class:`sklearn.linear_model.PassiveAggressiveRegressor` + + :class:`sklearn.neural_network.MLPRegressor` +- Clustering + + :class:`sklearn.cluster.MiniBatchKMeans` + + :class:`sklearn.cluster.Birch` +- Decomposition / feature Extraction + + :class:`sklearn.decomposition.MiniBatchDictionaryLearning` + + :class:`sklearn.decomposition.IncrementalPCA` + + :class:`sklearn.decomposition.LatentDirichletAllocation` + + :class:`sklearn.decomposition.MiniBatchNMF` +- Preprocessing + + :class:`sklearn.preprocessing.StandardScaler` + + :class:`sklearn.preprocessing.MinMaxScaler` + + :class:`sklearn.preprocessing.MaxAbsScaler` For classification, a somewhat important thing to note is that although a stateless feature extraction routine may be able to cope with new/unseen diff --git a/doc/conf.py b/doc/conf.py index 25f2a9eab6007..0587e98130118 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -10,14 +10,16 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os -import warnings import re +import sys +import warnings from datetime import datetime -from sklearn.externals._packaging.version import parse -from pathlib import Path from io import StringIO +from pathlib import Path + +from sklearn.externals._packaging.version import parse +from sklearn.utils._testing import turn_warnings_into_errors # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory @@ -25,8 +27,9 @@ # absolute, like shown here. sys.path.insert(0, os.path.abspath("sphinxext")) -from github_link import make_linkcode_resolve import sphinx_gallery +from github_link import make_linkcode_resolve +from sphinx_gallery.notebook import add_code_cell, add_markdown_cell from sphinx_gallery.sorting import ExampleTitleSortKey try: @@ -56,12 +59,32 @@ "sphinx_issues", "add_toctree_functions", "sphinx-prompt", + "sphinx_copybutton", "sphinxext.opengraph", "doi_role", "allow_nan_estimators", "matplotlib.sphinxext.plot_directive", ] +# Specify how to identify the prompt when copying code snippets +copybutton_prompt_text = r">>> |\.\.\. " +copybutton_prompt_is_regexp = True +copybutton_exclude = "style" + +try: + import jupyterlite_sphinx # noqa: F401 + + extensions.append("jupyterlite_sphinx") + with_jupyterlite = True +except ImportError: + # In some cases we don't want to require jupyterlite_sphinx to be installed, + # e.g. the doc-min-dependencies build + warnings.warn( + "jupyterlite_sphinx is not installed, you need to install it " + "if you want JupyterLite links to appear in each example" + ) + with_jupyterlite = False + # Produce `plot::` directives for examples that contain `import matplotlib` or # `from matplotlib import`. numpydoc_use_plots = True @@ -171,7 +194,8 @@ # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - "google_analytics": True, + "legacy_google_analytics": True, + "analytics": True, "mathjax_path": mathjax_path, "link_to_live_contributing_page": not parsed_version.is_devrelease, } @@ -248,9 +272,9 @@ -1 ] latest_highlights = latest_highlights.with_suffix("").name -html_context[ - "release_highlights" -] = f"auto_examples/release_highlights/{latest_highlights}" +html_context["release_highlights"] = ( + f"auto_examples/release_highlights/{latest_highlights}" +) # get version from highlight name assuming highlights have the form # plot_release_highlights_0_22_0 @@ -268,11 +292,24 @@ "auto_examples/linear_model/plot_bayesian_ridge": ( "auto_examples/linear_model/plot_ard" ), - "examples/model_selection/grid_search_text_feature_extraction.py": ( - "examples/model_selection/plot_grid_search_text_feature_extraction.py" + "auto_examples/model_selection/grid_search_text_feature_extraction.py": ( + "auto_examples/model_selection/plot_grid_search_text_feature_extraction.py" + ), + "auto_examples/miscellaneous/plot_changed_only_pprint_parameter": ( + "auto_examples/miscellaneous/plot_estimator_representation" ), - "examples/miscellaneous/plot_changed_only_pprint_parameter": ( - "examples/miscellaneous/plot_estimator_representation" + "auto_examples/decomposition/plot_beta_divergence": ( + "auto_examples/applications/plot_topics_extraction_with_nmf_lda" + ), + "auto_examples/svm/plot_svm_nonlinear": "auto_examples/svm/plot_svm_kernels", + "auto_examples/ensemble/plot_adaboost_hastie_10_2": ( + "auto_examples/ensemble/plot_adaboost_multiclass" + ), + "auto_examples/decomposition/plot_pca_3d": ( + "auto_examples/decomposition/plot_pca_iris" + ), + "auto_examples/exercises/plot_cv_digits.py": ( + "auto_examples/model_selection/plot_nested_cross_validation_iris.py" ), } html_context["redirects"] = redirects @@ -280,7 +317,30 @@ html_additional_pages[old_link] = "redirects.html" # Not showing the search summary makes the search page load faster. -html_show_search_summary = False +html_show_search_summary = True + + +# The "summary-anchor" IDs will be overwritten via JavaScript to be unique. +# See `doc/theme/scikit-learn-modern/static/js/details-permalink.js`. +rst_prolog = """ +.. |details-start| raw:: html + +
+ + +.. |details-split| raw:: html + + Click for more details + + +
+ +.. |details-end| raw:: html + +
+
+ +""" # -- Options for LaTeX output ------------------------------------------------ latex_elements = { @@ -331,6 +391,7 @@ "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "joblib": ("https://joblib.readthedocs.io/en/latest/", None), "seaborn": ("https://seaborn.pydata.org/", None), + "skops": ("https://skops.readthedocs.io/en/stable/", None), } v = parse(release) @@ -389,7 +450,7 @@ def __call__(self, filename): prefix = "plot_release_highlights_" # Use title to sort if not a release highlight - if not filename.startswith(prefix): + if not str(filename).startswith(prefix): return title major_minor = filename[len(prefix) :].split("_")[:2] @@ -399,6 +460,74 @@ def __call__(self, filename): return -version_float +def notebook_modification_function(notebook_content, notebook_filename): + notebook_content_str = str(notebook_content) + warning_template = "\n".join( + [ + "
", + "", + "# JupyterLite warning", + "", + "{message}", + "
", + ] + ) + + message_class = "warning" + message = ( + "Running the scikit-learn examples in JupyterLite is experimental and you may" + " encounter some unexpected behavior.\n\nThe main difference is that imports" + " will take a lot longer than usual, for example the first `import sklearn` can" + " take roughly 10-20s.\n\nIf you notice problems, feel free to open an" + " [issue](https://github.com/scikit-learn/scikit-learn/issues/new/choose)" + " about it." + ) + + markdown = warning_template.format(message_class=message_class, message=message) + + dummy_notebook_content = {"cells": []} + add_markdown_cell(dummy_notebook_content, markdown) + + code_lines = [] + + if "seaborn" in notebook_content_str: + code_lines.append("%pip install seaborn") + if "plotly.express" in notebook_content_str: + code_lines.append("%pip install plotly") + if "skimage" in notebook_content_str: + code_lines.append("%pip install scikit-image") + if "polars" in notebook_content_str: + code_lines.append("%pip install polars") + if "fetch_" in notebook_content_str: + code_lines.extend( + [ + "%pip install pyodide-http", + "import pyodide_http", + "pyodide_http.patch_all()", + ] + ) + # always import matplotlib and pandas to avoid Pyodide limitation with + # imports inside functions + code_lines.extend(["import matplotlib", "import pandas"]) + + if code_lines: + code_lines = ["# JupyterLite-specific code"] + code_lines + code = "\n".join(code_lines) + add_code_cell(dummy_notebook_content, code) + + notebook_content["cells"] = ( + dummy_notebook_content["cells"] + notebook_content["cells"] + ) + + +default_global_config = sklearn.get_config() + + +def reset_sklearn_config(gallery_conf, fname): + """Reset sklearn config to default values.""" + sklearn.set_config(**default_global_config) + + sphinx_gallery_conf = { "doc_module": "sklearn", "backreferences_dir": os.path.join("modules", "generated"), @@ -420,7 +549,13 @@ def __call__(self, filename): "inspect_global_variables": False, "remove_config_comments": True, "plot_gallery": "True", + "recommender": {"enable": True, "n_examples": 5, "min_df": 12}, + "reset_modules": ("matplotlib", "seaborn", reset_sklearn_config), } +if with_jupyterlite: + sphinx_gallery_conf["jupyterlite"] = { + "notebook_modification_function": notebook_modification_function + } # The following dictionary contains the information used to create the @@ -564,9 +699,11 @@ def setup(app): # The following is used by sphinx.ext.linkcode to provide links to github linkcode_resolve = make_linkcode_resolve( "sklearn", - "https://github.com/scikit-learn/" - "scikit-learn/blob/{revision}/" - "{package}/{path}#L{lineno}", + ( + "https://github.com/scikit-learn/" + "scikit-learn/blob/{revision}/" + "{package}/{path}#L{lineno}" + ), ) warnings.filterwarnings( @@ -577,7 +714,8 @@ def setup(app): " non-GUI backend, so cannot show the figure." ), ) - +if os.environ.get("SKLEARN_WARNINGS_AS_ERRORS", "0") != "0": + turn_warnings_into_errors() # maps functions with a class name that is indistinguishable when case is # ignore to another filename @@ -612,20 +750,32 @@ def setup(app): # ignore links to specific pdf pages because linkcheck does not handle them # ('utf-8' codec can't decode byte error) r"http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=.*", - "https://www.fordfoundation.org/media/2976/" - "roads-and-bridges-the-unseen-labor-behind-our-digital-infrastructure.pdf#page=.*", + ( + "https://www.fordfoundation.org/media/2976/roads-and-bridges" + "-the-unseen-labor-behind-our-digital-infrastructure.pdf#page=.*" + ), # links falsely flagged as broken - "https://www.researchgate.net/publication/" - "233096619_A_Dendrite_Method_for_Cluster_Analysis", - "https://www.researchgate.net/publication/221114584_Random_Fourier_Approximations_" - "for_Skewed_Multiplicative_Histogram_Kernels", - "https://www.researchgate.net/publication/4974606_" - "Hedonic_housing_prices_and_the_demand_for_clean_air", - "https://www.researchgate.net/profile/Anh-Huy-Phan/publication/220241471_Fast_" - "Local_Algorithms_for_Large_Scale_Nonnegative_Matrix_and_Tensor_Factorizations", + ( + "https://www.researchgate.net/publication/" + "233096619_A_Dendrite_Method_for_Cluster_Analysis" + ), + ( + "https://www.researchgate.net/publication/221114584_Random_Fourier" + "_Approximations_for_Skewed_Multiplicative_Histogram_Kernels" + ), + ( + "https://www.researchgate.net/publication/4974606_" + "Hedonic_housing_prices_and_the_demand_for_clean_air" + ), + ( + "https://www.researchgate.net/profile/Anh-Huy-Phan/publication/220241471_Fast_" + "Local_Algorithms_for_Large_Scale_Nonnegative_Matrix_and_Tensor_Factorizations" + ), "https://doi.org/10.13140/RG.2.2.35280.02565", - "https://www.microsoft.com/en-us/research/uploads/prod/2006/01/" - "Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf", + ( + "https://www.microsoft.com/en-us/research/uploads/prod/2006/01/" + "Bishop-Pattern-Recognition-and-Machine-Learning-2006.pdf" + ), "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-99-87.pdf", "https://microsoft.com/", "https://www.jstor.org/stable/2984099", @@ -640,6 +790,8 @@ def setup(app): # https://github.com/sphinx-doc/sphinx/issues/9016 for more details about # the github example r"https://github.com/conda-forge/miniforge#miniforge", + r"https://github.com/joblib/threadpoolctl/" + "#setting-the-maximum-size-of-thread-pools", r"https://stackoverflow.com/questions/5836335/" "consistently-create-same-random-numpy-array/5837352#comment6712034_5837352", ] diff --git a/doc/conftest.py b/doc/conftest.py index ab68b2f4bc7c5..d66148ccc553f 100644 --- a/doc/conftest.py +++ b/doc/conftest.py @@ -1,16 +1,16 @@ import os -from os.path import exists -from os.path import join -from os import environ import warnings +from os import environ +from os.path import exists, join + +import pytest +from _pytest.doctest import DoctestItem -from sklearn.utils import IS_PYPY -from sklearn.utils._testing import SkipTest -from sklearn.utils._testing import check_skip_network -from sklearn.utils.fixes import parse_version from sklearn.datasets import get_data_home from sklearn.datasets._base import _pkl_filepath from sklearn.datasets._twenty_newsgroups import CACHE_NAME +from sklearn.utils._testing import SkipTest, check_skip_network +from sklearn.utils.fixes import _IS_PYPY, np_base_version, parse_version def setup_labeled_faces(): @@ -34,7 +34,7 @@ def setup_twenty_newsgroups(): def setup_working_with_text_data(): - if IS_PYPY and os.environ.get("CI", None): + if _IS_PYPY and os.environ.get("CI", None): raise SkipTest("Skipping too slow test with PyPy on CI") check_skip_network() cache_path = _pkl_filepath(get_data_home(), CACHE_NAME) @@ -167,3 +167,34 @@ def pytest_configure(config): matplotlib.use("agg") except ImportError: pass + + +def pytest_collection_modifyitems(config, items): + """Called after collect is completed. + + Parameters + ---------- + config : pytest config + items : list of collected items + """ + skip_doctests = False + if np_base_version >= parse_version("2"): + # Skip doctests when using numpy 2 for now. See the following discussion + # to decide what to do in the longer term: + # https://github.com/scikit-learn/scikit-learn/issues/27339 + reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2" + skip_doctests = True + + # Normally doctest has the entire module's scope. Here we set globs to an empty dict + # to remove the module's scope: + # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context + for item in items: + if isinstance(item, DoctestItem): + item.dtest.globs = {} + + if skip_doctests: + skip_marker = pytest.mark.skip(reason=reason) + + for item in items: + if isinstance(item, DoctestItem): + item.add_marker(skip_marker) diff --git a/doc/contributor_experience_team.rst b/doc/contributor_experience_team.rst index 20a45f541ec99..7d942a07e6a7d 100644 --- a/doc/contributor_experience_team.rst +++ b/doc/contributor_experience_team.rst @@ -10,10 +10,6 @@

Juan Carlos Alfaro Jiménez

-
-

Arturo Amor

-
-

Lucy Liu

@@ -30,10 +26,6 @@

Sylvain Marié

-
-

Chiara Marmo

-
-

Norbert Preining

@@ -46,7 +38,7 @@

Albert Thomas

-
-

Tim Head

+
+

Maren Westermann

diff --git a/doc/contributor_experience_team_emeritus.rst b/doc/contributor_experience_team_emeritus.rst new file mode 100644 index 0000000000000..a833907dd5e4a --- /dev/null +++ b/doc/contributor_experience_team_emeritus.rst @@ -0,0 +1 @@ +- Chiara Marmo diff --git a/doc/datasets/loading_other_datasets.rst b/doc/datasets/loading_other_datasets.rst index a376a69f26dc3..fdd7fd1666cce 100644 --- a/doc/datasets/loading_other_datasets.rst +++ b/doc/datasets/loading_other_datasets.rst @@ -99,7 +99,7 @@ from the repository using the function For example, to download a dataset of gene expressions in mice brains:: >>> from sklearn.datasets import fetch_openml - >>> mice = fetch_openml(name='miceprotein', version=4, parser="auto") + >>> mice = fetch_openml(name='miceprotein', version=4) To fully specify a dataset, you need to provide a name and a version, though the version is optional, see :ref:`openml_versions` below. @@ -147,7 +147,7 @@ dataset on the openml website:: The ``data_id`` also uniquely identifies a dataset from OpenML:: - >>> mice = fetch_openml(data_id=40966, parser="auto") + >>> mice = fetch_openml(data_id=40966) >>> mice.details # doctest: +SKIP {'id': '4550', 'name': 'MiceProtein', 'version': '1', 'format': 'ARFF', 'creator': ..., @@ -171,7 +171,7 @@ which can contain entirely different datasets. If a particular version of a dataset has been found to contain significant issues, it might be deactivated. Using a name to specify a dataset will yield the earliest version of a dataset that is still active. That means that -``fetch_openml(name="miceprotein", parser="auto")`` can yield different results +``fetch_openml(name="miceprotein")`` can yield different results at different times if earlier versions become inactive. You can see that the dataset with ``data_id`` 40966 that we fetched above is the first version of the "miceprotein" dataset:: @@ -182,19 +182,19 @@ the first version of the "miceprotein" dataset:: In fact, this dataset only has one version. The iris dataset on the other hand has multiple versions:: - >>> iris = fetch_openml(name="iris", parser="auto") + >>> iris = fetch_openml(name="iris") >>> iris.details['version'] #doctest: +SKIP '1' >>> iris.details['id'] #doctest: +SKIP '61' - >>> iris_61 = fetch_openml(data_id=61, parser="auto") + >>> iris_61 = fetch_openml(data_id=61) >>> iris_61.details['version'] '1' >>> iris_61.details['id'] '61' - >>> iris_969 = fetch_openml(data_id=969, parser="auto") + >>> iris_969 = fetch_openml(data_id=969) >>> iris_969.details['version'] '3' >>> iris_969.details['id'] @@ -212,7 +212,7 @@ binarized version of the data:: You can also specify both the name and the version, which also uniquely identifies the dataset:: - >>> iris_version_3 = fetch_openml(name="iris", version=3, parser="auto") + >>> iris_version_3 = fetch_openml(name="iris", version=3) >>> iris_version_3.details['version'] '3' >>> iris_version_3.details['id'] @@ -290,9 +290,9 @@ format usable by scikit-learn: context such as .mat and .arff * `numpy/routines.io `_ for standard loading of columnar data into numpy arrays -* scikit-learn's :func:`datasets.load_svmlight_file` for the svmlight or libSVM +* scikit-learn's :func:`load_svmlight_file` for the svmlight or libSVM sparse format -* scikit-learn's :func:`datasets.load_files` for directories of text files where +* scikit-learn's :func:`load_files` for directories of text files where the name of each directory is the name of each category and each file inside of each directory corresponds to one sample from that category diff --git a/doc/datasets/real_world.rst b/doc/datasets/real_world.rst index b528a26674db9..78b09e6f722b0 100644 --- a/doc/datasets/real_world.rst +++ b/doc/datasets/real_world.rst @@ -25,6 +25,7 @@ They can be loaded using the following functions: fetch_rcv1 fetch_kddcup99 fetch_california_housing + fetch_species_distributions .. include:: ../../sklearn/datasets/descr/olivetti_faces.rst @@ -39,3 +40,5 @@ They can be loaded using the following functions: .. include:: ../../sklearn/datasets/descr/kddcup99.rst .. include:: ../../sklearn/datasets/descr/california_housing.rst + +.. include:: ../../sklearn/datasets/descr/species_distributions.rst diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst index 912d52802d456..ed25d30601e45 100644 --- a/doc/developers/advanced_installation.rst +++ b/doc/developers/advanced_installation.rst @@ -26,11 +26,12 @@ Installing a nightly build is the quickest way to: - check whether a bug you encountered has been fixed since the last release. -You can install the nightly build of scikit-learn using the `scipy-wheels-nightly` +You can install the nightly build of scikit-learn using the `scientific-python-nightly-wheels` index from the PyPI registry of `anaconda.org`: + .. prompt:: bash $ - pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn + pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scikit-learn Note that first uninstalling scikit-learn might be required to be able to install nightly builds of scikit-learn. @@ -63,33 +64,42 @@ feature, code or documentation improvement). If you installed Python with conda, we recommend to create a dedicated `conda environment`_ with all the build dependencies of scikit-learn - (namely NumPy_, SciPy_, and Cython_): + (namely NumPy_, SciPy_, Cython_, meson-python_ and Ninja_): + + .. prompt:: bash $ + + conda create -n sklearn-env -c conda-forge python=3.9 numpy scipy cython meson-python ninja + + It is not always necessary but it is safer to open a new prompt before + activating the newly created conda environment. .. prompt:: bash $ - conda create -n sklearn-env -c conda-forge python=3.9 numpy scipy cython conda activate sklearn-env -#. **Alternative to conda:** If you run Linux or similar, you can instead use - your system's Python provided it is recent enough (3.8 or higher - at the time of writing). In this case, we recommend to create a dedicated - virtualenv_ and install the scikit-learn build dependencies with pip: +#. **Alternative to conda:** You can use alternative installations of Python + provided they are recent enough (3.9 or higher at the time of writing). + Here is an example on how to create a build environment for a Linux system's + Python. Build dependencies are installed with `pip` in a dedicated virtualenv_ + to avoid disrupting other Python programs installed on the system: .. prompt:: bash $ python3 -m venv sklearn-env source sklearn-env/bin/activate - pip install wheel numpy scipy cython + pip install wheel numpy scipy cython meson-python ninja #. Install a compiler with OpenMP_ support for your platform. See instructions for :ref:`compiler_windows`, :ref:`compiler_macos`, :ref:`compiler_linux` and :ref:`compiler_freebsd`. -#. Build the project with pip in :ref:`editable_mode`: +#. Build the project with pip: .. prompt:: bash $ - pip install --verbose --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true #. Check that the installed scikit-learn has a version number ending with `.dev0`: @@ -103,11 +113,14 @@ feature, code or documentation improvement). .. note:: - You will have to run the ``pip install --no-build-isolation --editable .`` - command every time the source code of a Cython file is updated - (ending in `.pyx` or `.pxd`). Use the ``--no-build-isolation`` flag to - avoid compiling the whole project each time, only the files you have - modified. + `--config-settings editable-verbose=true` is optional but recommended + to avoid surprises when you import `sklearn`. `meson-python` implements + editable installs by rebuilding `sklearn` when executing `import sklearn`. + With the recommended setting you will see a message when this happens, + rather than potentially waiting without feed-back and wondering + what is taking so long. Bonus: this means you only have to run the `pip + install` command once, `sklearn` will automatically be rebuilt when + importing `sklearn`. Dependencies ------------ @@ -171,26 +184,6 @@ If you want to build a stable version, you can ``git checkout `` to get the code for that particular version, or download an zip archive of the version from github. -.. _editable_mode: - -Editable mode -------------- - -If you run the development version, it is cumbersome to reinstall the package -each time you update the sources. Therefore it is recommended that you install -in with the ``pip install --no-build-isolation --editable .`` command, which -allows you to edit the code in-place. This builds the extension in place and -creates a link to the development directory (see `the pip docs -`_). - -As the doc aboves explains, this is fundamentally similar to using the command -``python setup.py develop``. (see `the setuptool docs -`_). -It is however preferred to use pip. - -On Unix-like systems, you can equivalently type ``make in`` from the top-level -folder. Have a look at the ``Makefile`` for additional utilities. - .. _platform_specific_instructions: Platform-specific instructions @@ -225,10 +218,13 @@ console: For 64-bit Python, configure the build environment by running the following commands in ``cmd`` or an Anaconda Prompt (if you use Anaconda): - :: +.. sphinx-prompt 1.3.0 (used in doc-min-dependencies CI task) does not support `batch` prompt type, +.. so we work around by using a known prompt type and an explicit prompt text. +.. +.. prompt:: bash C:\> - $ SET DISTUTILS_USE_SDK=1 - $ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 + SET DISTUTILS_USE_SDK=1 + "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 Replace ``x64`` by ``x86`` to build for 32-bit Python. @@ -236,11 +232,13 @@ Please be aware that the path above might be different from user to user. The aim is to point to the "vcvarsall.bat" file that will set the necessary environment variables in the current command prompt. -Finally, build scikit-learn from this command prompt: +Finally, build scikit-learn with this command prompt: .. prompt:: bash $ - pip install --verbose --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. _compiler_macos: @@ -279,10 +277,18 @@ scikit-learn from source: .. prompt:: bash $ conda create -n sklearn-dev -c conda-forge python numpy scipy cython \ - joblib threadpoolctl pytest compilers llvm-openmp + joblib threadpoolctl pytest compilers llvm-openmp meson-python ninja + +It is not always necessary but it is safer to open a new prompt before +activating the newly created conda environment. + +.. prompt:: bash $ + conda activate sklearn-dev make clean - pip install --verbose --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. note:: @@ -300,12 +306,6 @@ forge using the following command: which should include ``compilers`` and ``llvm-openmp``. -.. note:: - - If you installed these packages after creating and activating a new conda - environment, you will need to first deactivate and then reactivate the - environment for these changes to take effect. - The compilers meta-package will automatically set custom environment variables: @@ -362,7 +362,9 @@ Finally, build scikit-learn in verbose mode (to check for the presence of the .. prompt:: bash $ make clean - pip install --verbose --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. _compiler_linux: @@ -388,7 +390,9 @@ then proceed as usual: .. prompt:: bash $ pip3 install cython - pip3 install --verbose --editable . + pip3 install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true Cython and the pre-compiled wheels for the runtime dependencies (numpy, scipy and joblib) should automatically be installed in @@ -420,9 +424,17 @@ in the user folder using conda: .. prompt:: bash $ conda create -n sklearn-dev -c conda-forge python numpy scipy cython \ - joblib threadpoolctl pytest compilers + joblib threadpoolctl pytest compilers meson-python ninja + +It is not always necessary but it is safer to open a new prompt before +activating the newly created conda environment. + +.. prompt:: bash $ + conda activate sklearn-dev - pip install --verbose --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true .. _compiler_freebsd: @@ -451,13 +463,17 @@ Finally, build the package using the standard command: .. prompt:: bash $ - pip install --verbose --no-build-isolation --editable . + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true For the upcoming FreeBSD 12.1 and 11.3 versions, OpenMP will be included in the base system and these steps will not be necessary. .. _OpenMP: https://en.wikipedia.org/wiki/OpenMP .. _Cython: https://cython.org +.. _meson-python: https://mesonbuild.com/meson-python +.. _Ninja: https://ninja-build.org/ .. _NumPy: https://numpy.org .. _SciPy: https://www.scipy.org .. _Homebrew: https://brew.sh @@ -465,16 +481,43 @@ the base system and these steps will not be necessary. .. _conda environment: https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html .. _Miniforge3: https://github.com/conda-forge/miniforge#miniforge3 -Parallel builds -=============== +Alternative compilers +===================== + +The following command will build scikit-learn using your default C/C++ compiler. + +.. prompt:: bash $ + + pip install --editable . \ + --verbose --no-build-isolation \ + --config-settings editable-verbose=true + +If you want to build scikit-learn with another compiler handled by ``setuptools``, +use the following command: + +.. prompt:: bash $ + + python setup.py build_ext --compiler= -i build_clib --compiler= + +To see the list of available compilers run: + +.. prompt:: bash $ + + python setup.py build_ext --help-compiler + +If your compiler is not listed here, you can specify it through some environment +variables (does not work on windows). This `section +`_ +of the setuptools documentation explains in details which environment variables +are used by ``setuptools``, and at which stage of the compilation, to set the +compiler and linker options. -It is possible to build scikit-learn compiled extensions in parallel by setting -and environment variable as follows before calling the ``pip install`` or -``python setup.py build_ext`` commands:: +When setting these environment variables, it is advised to first check their +``sysconfig`` counterparts variables and adapt them to your compiler. For instance:: - export SKLEARN_BUILD_PARALLEL=3 - pip install --verbose --no-build-isolation --editable . + import sysconfig + print(sysconfig.get_config_var('CC')) + print(sysconfig.get_config_var('LDFLAGS')) -On a machine with 2 CPU cores, it can be beneficial to use a parallelism level -of 3 to overlap IO bound tasks (reading and writing files on disk) with CPU -bound tasks (actually compiling). +In addition, since Scikit-learn uses OpenMP, you need to include the appropriate OpenMP +flag of your compiler into the ``CFLAGS`` and ``CPPFLAGS`` environment variables. diff --git a/doc/developers/bug_triaging.rst b/doc/developers/bug_triaging.rst index 80a0a74c1f3e5..915ea0a9a22b7 100644 --- a/doc/developers/bug_triaging.rst +++ b/doc/developers/bug_triaging.rst @@ -19,18 +19,18 @@ A third party can give useful feedback or even add comments on the issue. The following actions are typically useful: - - documenting issues that are missing elements to reproduce the problem - such as code samples +- documenting issues that are missing elements to reproduce the problem + such as code samples - - suggesting better use of code formatting +- suggesting better use of code formatting - - suggesting to reformulate the title and description to make them more - explicit about the problem to be solved +- suggesting to reformulate the title and description to make them more + explicit about the problem to be solved - - linking to related issues or discussions while briefly describing how - they are related, for instance "See also #xyz for a similar attempt - at this" or "See also #xyz where the same thing happened in - SomeEstimator" provides context and helps the discussion. +- linking to related issues or discussions while briefly describing how + they are related, for instance "See also #xyz for a similar attempt + at this" or "See also #xyz where the same thing happened in + SomeEstimator" provides context and helps the discussion. .. topic:: Fruitful discussions @@ -40,7 +40,7 @@ The following actions are typically useful: Overall, it is useful to stay positive and assume good will. `The following article - `_ + `_ explores how to lead online discussions in the context of open source. Working on PRs to help review diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index 560e271ee833a..9f43d8ed52c38 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -82,7 +82,9 @@ or changes to dependencies or supported versions, it must be backed by a using the `SLEP template `_ and follows the decision-making process outlined in :ref:`governance`. -.. topic:: Contributing to related projects +|details-start| +**Contributing to related projects** +|details-split| Scikit-learn thrives in an ecosystem of several related projects, which also may have relevant issues to work on, including smaller projects such as: @@ -104,6 +106,7 @@ and follows the decision-making process outlined in :ref:`governance`. Helping these projects may help Scikit-learn too. See also :ref:`related_projects`. +|details-end| Submitting a bug report or a feature request ============================================ @@ -126,7 +129,7 @@ following rules before submitting: - If you are submitting an algorithm or feature request, please verify that the algorithm fulfills our `new algorithm requirements - `_. + `_. - If you are submitting a bug report, we strongly encourage you to follow the guidelines in :ref:`filing_bugs`. @@ -247,18 +250,18 @@ how to set up your git repository: git clone git@github.com:YourLogin/scikit-learn.git # add --depth 1 if your connection is slow cd scikit-learn -3. Follow steps 2-7 in :ref:`install_bleeding_edge` to build scikit-learn in +4. Follow steps 2-6 in :ref:`install_bleeding_edge` to build scikit-learn in development mode and return to this document. -4. Install the development dependencies: +5. Install the development dependencies: .. prompt:: bash $ - pip install pytest pytest-cov flake8 mypy numpydoc black==22.3.0 + pip install pytest pytest-cov ruff mypy numpydoc black==24.3.0 .. _upstream: -5. Add the ``upstream`` remote. This saves a reference to the main +6. Add the ``upstream`` remote. This saves a reference to the main scikit-learn repository, which you can use to keep your repository synchronized with the latest changes: @@ -266,7 +269,7 @@ how to set up your git repository: git remote add upstream git@github.com:scikit-learn/scikit-learn.git -6. Check that the `upstream` and `origin` remote aliases are configured correctly +7. Check that the `upstream` and `origin` remote aliases are configured correctly by running `git remote -v` which should display:: origin git@github.com:YourLogin/scikit-learn.git (fetch) @@ -274,11 +277,13 @@ how to set up your git repository: upstream git@github.com:scikit-learn/scikit-learn.git (fetch) upstream git@github.com:scikit-learn/scikit-learn.git (push) -You should now have a working installation of scikit-learn, and your git -repository properly configured. The next steps now describe the process of -modifying code and submitting a PR: +You should now have a working installation of scikit-learn, and your git repository +properly configured. It could be useful to run some test to verify your installation. +Please refer to :ref:`pytest_tips` for examples. -7. Synchronize your ``main`` branch with the ``upstream/main`` branch, +The next steps now describe the process of modifying code and submitting a PR: + +8. Synchronize your ``main`` branch with the ``upstream/main`` branch, more details on `GitHub Docs `_: .. prompt:: bash $ @@ -287,27 +292,27 @@ modifying code and submitting a PR: git fetch upstream git merge upstream/main -8. Create a feature branch to hold your development changes: +9. Create a feature branch to hold your development changes: - .. prompt:: bash $ + .. prompt:: bash $ git checkout -b my_feature and start making changes. Always use a feature branch. It's good practice to never work on the ``main`` branch! -9. (**Optional**) Install `pre-commit `_ to - run code style checks before each commit: +10. (**Optional**) Install `pre-commit `_ to + run code style checks before each commit: - .. prompt:: bash $ + .. prompt:: bash $ - pip install pre-commit - pre-commit install + pip install pre-commit + pre-commit install - pre-commit checks can be disabled for a particular commit with - `git commit -n`. + pre-commit checks can be disabled for a particular commit with + `git commit -n`. -10. Develop the feature on your feature branch on your computer, using Git to +11. Develop the feature on your feature branch on your computer, using Git to do the version control. When you're done editing, add changed files using ``git add`` and then ``git commit``: @@ -323,24 +328,12 @@ modifying code and submitting a PR: git push -u origin my_feature -11. Follow `these +12. Follow `these `_ instructions to create a pull request from your fork. This will send an email to the committers. You may want to consider sending an email to the mailing list for more visibility. -.. note:: - - If you are modifying a Cython module, you have to re-compile after - modifications and before testing them: - - .. prompt:: bash $ - - pip install --no-build-isolation -e . - - Use the ``--no-build-isolation`` flag to avoid compiling the whole project - each time, only the files you have modified. - It is often helpful to keep your local feature branch synchronized with the latest changes of the main scikit-learn repository: @@ -425,30 +418,15 @@ complies with the following rules before marking a PR as ``[MRG]``. The non-regression tests should fail for the code base in the ``main`` branch and pass for the PR code. -5. Run `black` to auto-format your code. - .. prompt:: bash $ +5. Follow the :ref:`coding-guidelines`. - black . - See black's - `editor integration documentation `_ - to configure your editor to run `black`. - -6. Run `flake8` to make sure you followed the project coding conventions. - - .. prompt:: bash $ - - flake8 . - -7. Follow the :ref:`coding-guidelines`. - - -8. When applicable, use the validation tools and scripts in the +6. When applicable, use the validation tools and scripts in the ``sklearn.utils`` submodule. A list of utility routines available for developers can be found in the :ref:`developers-utils` page. -9. Often pull requests resolve one or more other issues (or pull requests). +7. Often pull requests resolve one or more other issues (or pull requests). If merging your pull request means that some other issues/PRs should be closed, you should `use keywords to create link to them `_ @@ -458,7 +436,7 @@ complies with the following rules before marking a PR as ``[MRG]``. The related to some other issues/PRs, create a link to them without using the keywords (e.g., ``See also #1234``). -10. PRs should often substantiate the change, through benchmarks of +8. PRs should often substantiate the change, through benchmarks of performance and efficiency (see :ref:`monitoring_performances`) or through examples of usage. Examples also illustrate the features and intricacies of the library to users. Have a look at other examples in the `examples/ @@ -467,14 +445,14 @@ complies with the following rules before marking a PR as ``[MRG]``. The functionality is useful in practice and, if possible, compare it to other methods available in scikit-learn. -11. New features have some maintenance overhead. We expect PR authors +9. New features have some maintenance overhead. We expect PR authors to take part in the maintenance for the code they submit, at least initially. New features need to be illustrated with narrative documentation in the user guide, with small code snippets. If relevant, please also add references in the literature, with PDF links when possible. -12. The user guide should also include expected time and space complexity +10. The user guide should also include expected time and space complexity of the algorithm and scalability, e.g. "this algorithm can scale to a large number of samples > 100000, but does not scale in dimensionality: n_features is expected to be lower than 100". @@ -534,27 +512,33 @@ Continuous Integration (CI) * Azure pipelines are used for testing scikit-learn on Linux, Mac and Windows, with different dependencies and settings. -* CircleCI is used to build the docs for viewing, for linting with flake8, and - for testing with ARM64 / aarch64 on Linux +* CircleCI is used to build the docs for viewing. +* Github Actions are used for various tasks, including building wheels and + source distributions. +* Cirrus CI is used to build on ARM. Please note that if one of the following markers appear in the latest commit message, the following actions are taken. - ====================== =================== - Commit Message Marker Action Taken by CI - ---------------------- ------------------- - [ci skip] CI is skipped completely - [cd build] CD is run (wheels and source distribution are built) - [cd build gh] CD is run only for GitHub Actions - [lint skip] Azure pipeline skips linting - [scipy-dev] Build & test with our dependencies (numpy, scipy, etc ...) development builds - [nogil] Build & test with the nogil experimental branches of CPython, Cython, NumPy, SciPy... - [pypy] Build & test with PyPy - [float32] Run float32 tests by setting `SKLEARN_RUN_FLOAT32_TESTS=1`. See :ref:`environment_variable` for more details - [doc skip] Docs are not built - [doc quick] Docs built, but excludes example gallery plots - [doc build] Docs built including example gallery plots (very long) - ====================== =================== +====================== =================== +Commit Message Marker Action Taken by CI +---------------------- ------------------- +[ci skip] CI is skipped completely +[cd build] CD is run (wheels and source distribution are built) +[cd build gh] CD is run only for GitHub Actions +[cd build cirrus] CD is run only for Cirrus CI +[lint skip] Azure pipeline skips linting +[scipy-dev] Build & test with our dependencies (numpy, scipy, etc.) development builds +[nogil] Build & test with the nogil experimental branches of CPython, Cython, NumPy, SciPy, ... +[pypy] Build & test with PyPy +[pyodide] Build & test with Pyodide +[azure parallel] Run Azure CI jobs in parallel +[cirrus arm] Run Cirrus CI ARM test +[float32] Run float32 tests by setting `SKLEARN_RUN_FLOAT32_TESTS=1`. See :ref:`environment_variable` for more details +[doc skip] Docs are not built +[doc quick] Docs built, but excludes example gallery plots +[doc build] Docs built including example gallery plots (very long) +====================== =================== Note that, by default, the documentation is built but only the examples that are directly modified by the pull request are executed. @@ -686,250 +670,301 @@ We are glad to accept any sort of documentation: of scikit-learn modules, compare different algorithms or discuss their interpretation etc. Examples live in `examples/ `_ -* **other reStructuredText documents** (like this one) - provide various other - useful information (e.g., our guide to contributing) and live in +* **other reStructuredText documents** - provide various other + useful information (e.g., the :ref:`contributing` guide) and live in `doc/ `_. -You can edit the documentation using any text editor, and then generate the -HTML output by following :ref:`building_documentation`. The resulting HTML files -will be placed in ``_build/html/stable`` and are viewable in a web browser, for -instance by opening the local ``_build/html/stable/index.html`` file. +|details-start| +**Guidelines for writing docstrings** +|details-split| -.. _building_documentation: +* When documenting the parameters and attributes, here is a list of some + well-formatted examples:: -Building the documentation --------------------------- + n_clusters : int, default=3 + The number of clusters detected by the algorithm. -First, make sure you have :ref:`properly installed ` -the development version. + some_param : {'hello', 'goodbye'}, bool or int, default=True + The parameter description goes here, which can be either a string + literal (either `hello` or `goodbye`), a bool, or an int. The default + value is True. -.. - packaging is not needed once setuptools starts shipping packaging>=17.0 + array_parameter : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples,) + This parameter accepts data in either of the mentioned forms, with one + of the mentioned shapes. The default value is + `np.ones(shape=(n_samples,))`. -Building the documentation requires installing some additional packages: + list_param : list of int -.. prompt:: bash $ + typed_ndarray : ndarray of shape (n_samples,), dtype=np.int32 - pip install sphinx sphinx-gallery numpydoc matplotlib Pillow pandas \ - scikit-image packaging seaborn sphinx-prompt \ - sphinxext-opengraph plotly + sample_weight : array-like of shape (n_samples,), default=None -To build the documentation, you need to be in the ``doc`` folder: + multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays -.. prompt:: bash $ + In general have the following in mind: - cd doc + * Use Python basic types. (``bool`` instead of ``boolean``) + * Use parenthesis for defining shapes: ``array-like of shape (n_samples,)`` + or ``array-like of shape (n_samples, n_features)`` + * For strings with multiple options, use brackets: ``input: {'log', + 'squared', 'multinomial'}`` + * 1D or 2D data can be a subset of ``{array-like, ndarray, sparse matrix, + dataframe}``. Note that ``array-like`` can also be a ``list``, while + ``ndarray`` is explicitly only a ``numpy.ndarray``. + * Specify ``dataframe`` when "frame-like" features are being used, such as + the column names. + * When specifying the data type of a list, use ``of`` as a delimiter: ``list + of int``. When the parameter supports arrays giving details about the + shape and/or data type and a list of such arrays, you can use one of + ``array-like of shape (n_samples,) or list of such arrays``. + * When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32`` after + defining the shape: ``ndarray of shape (n_samples,), dtype=np.int32``. You + can specify multiple dtype as a set: ``array-like of shape (n_samples,), + dtype={np.float64, np.float32}``. If one wants to mention arbitrary + precision, use `integral` and `floating` rather than the Python dtype + `int` and `float`. When both `int` and `floating` are supported, there is + no need to specify the dtype. + * When the default is ``None``, ``None`` only needs to be specified at the + end with ``default=None``. Be sure to include in the docstring, what it + means for the parameter or attribute to be ``None``. -In the vast majority of cases, you only need to generate the full web site, -without the example gallery: +* Add "See Also" in docstrings for related classes/functions. -.. prompt:: bash $ +* "See Also" in docstrings should be one line per reference, with a colon and an + explanation, for example:: - make + See Also + -------- + SelectKBest : Select features based on the k highest scores. + SelectFpr : Select features based on a false positive rate test. -The documentation will be generated in the ``_build/html/stable`` directory -and are viewable in a web browser, for instance by opening the local -``_build/html/stable/index.html`` file. -To also generate the example gallery you can use: +* Add one or two snippets of code in "Example" section to show how it can be used. -.. prompt:: bash $ +|details-end| - make html +|details-start| +**Guidelines for writing the user guide and other reStructuredText documents** +|details-split| -This will run all the examples, which takes a while. If you only want to -generate a few examples, you can use: +It is important to keep a good compromise between mathematical and algorithmic +details, and give intuition to the reader on what the algorithm does. -.. prompt:: bash $ +* Begin with a concise, hand-waving explanation of what the algorithm/code does on + the data. - EXAMPLES_PATTERN=your_regex_goes_here make html +* Highlight the usefulness of the feature and its recommended application. + Consider including the algorithm's complexity + (:math:`O\left(g\left(n\right)\right)`) if available, as "rules of thumb" can + be very machine-dependent. Only if those complexities are not available, then + rules of thumb may be provided instead. -This is particularly useful if you are modifying a few examples. +* Incorporate a relevant figure (generated from an example) to provide intuitions. -Set the environment variable `NO_MATHJAX=1` if you intend to view -the documentation in an offline setting. +* Include one or two short code examples to demonstrate the feature's usage. -To build the PDF manual, run: +* Introduce any necessary mathematical equations, followed by references. By + deferring the mathematical aspects, the documentation becomes more accessible + to users primarily interested in understanding the feature's practical + implications rather than its underlying mechanics. -.. prompt:: bash $ +* When editing reStructuredText (``.rst``) files, try to keep line length under + 88 characters when possible (exceptions include links and tables). - make latexpdf +* In scikit-learn reStructuredText files both single and double backticks + surrounding text will render as inline literal (often used for code, e.g., + `list`). This is due to specific configurations we have set. Single + backticks should be used nowadays. -.. warning:: **Sphinx version** +* Too much information makes it difficult for users to access the content they + are interested in. Use dropdowns to factorize it by using the following + syntax:: - While we do our best to have the documentation build under as many - versions of Sphinx as possible, the different versions tend to - behave slightly differently. To get the best results, you should - use the same version as the one we used on CircleCI. Look at this - `github search `_ - to know the exact version. + |details-start| + **Dropdown title** + |details-split| -Guidelines for writing documentation ------------------------------------- + Dropdown content. -It is important to keep a good compromise between mathematical and algorithmic -details, and give intuition to the reader on what the algorithm does. + |details-end| -Basically, to elaborate on the above, it is best to always -start with a small paragraph with a hand-waving explanation of what the -method does to the data. Then, it is very helpful to point out why the feature is -useful and when it should be used - the latter also including "big O" -(:math:`O\left(g\left(n\right)\right)`) complexities of the algorithm, as opposed -to just *rules of thumb*, as the latter can be very machine-dependent. If those -complexities are not available, then rules of thumb may be provided instead. + The snippet above will result in the following dropdown: -Secondly, a generated figure from an example (as mentioned in the previous -paragraph) should then be included to further provide some intuition. + |details-start| + **Dropdown title** + |details-split| -Next, one or two small code examples to show its use can be added. + Dropdown content. -Next, any math and equations, followed by references, -can be added to further the documentation. Not starting the -documentation with the maths makes it more friendly towards -users that are just interested in what the feature will do, as -opposed to how it works "under the hood". + |details-end| -Finally, follow the formatting rules below to make it consistently good: +* Information that can be hidden by default using dropdowns is: -* Add "See Also" in docstrings for related classes/functions. + * low hierarchy sections such as `References`, `Properties`, etc. (see for + instance the subsections in :ref:`det_curve`); -* "See Also" in docstrings should be one line per reference, - with a colon and an explanation, for example:: + * in-depth mathematical details; - See Also - -------- - SelectKBest : Select features based on the k highest scores. - SelectFpr : Select features based on a false positive rate test. + * narrative that is use-case specific; -* When documenting the parameters and attributes, here is a list of some - well-formatted examples:: + * in general, narrative that may only interest users that want to go beyond + the pragmatics of a given tool. - n_clusters : int, default=3 - The number of clusters detected by the algorithm. +* Do not use dropdowns for the low level section `Examples`, as it should stay + visible to all users. Make sure that the `Examples` section comes right after + the main discussion with the least possible folded section in-between. - some_param : {'hello', 'goodbye'}, bool or int, default=True - The parameter description goes here, which can be either a string - literal (either `hello` or `goodbye`), a bool, or an int. The default - value is True. +* Be aware that dropdowns break cross-references. If that makes sense, hide the + reference along with the text mentioning it. Else, do not use dropdown. - array_parameter : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples,) - This parameter accepts data in either of the mentioned forms, with one - of the mentioned shapes. The default value is - `np.ones(shape=(n_samples,))`. +|details-end| - list_param : list of int - typed_ndarray : ndarray of shape (n_samples,), dtype=np.int32 +|details-start| +**Guidelines for writing references** +|details-split| - sample_weight : array-like of shape (n_samples,), default=None +* When bibliographic references are available with `arxiv `_ + or `Digital Object Identifier `_ identification numbers, + use the sphinx directives `:arxiv:` or `:doi:`. For example, see references in + :ref:`Spectral Clustering Graphs `. - multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays +* For "References" in docstrings, see the Silhouette Coefficient + (:func:`sklearn.metrics.silhouette_score`). - In general have the following in mind: +* To cross-reference to other pages in the scikit-learn documentation use the + reStructuredText cross-referencing syntax: - 1. Use Python basic types. (``bool`` instead of ``boolean``) - 2. Use parenthesis for defining shapes: ``array-like of shape (n_samples,)`` - or ``array-like of shape (n_samples, n_features)`` - 3. For strings with multiple options, use brackets: - ``input: {'log', 'squared', 'multinomial'}`` - 4. 1D or 2D data can be a subset of - ``{array-like, ndarray, sparse matrix, dataframe}``. Note that ``array-like`` - can also be a ``list``, while ``ndarray`` is explicitly only a ``numpy.ndarray``. - 5. Specify ``dataframe`` when "frame-like" features are being used, such - as the column names. - 6. When specifying the data type of a list, use ``of`` as a delimiter: - ``list of int``. When the parameter supports arrays giving details - about the shape and/or data type and a list of such arrays, you can - use one of ``array-like of shape (n_samples,) or list of such arrays``. - 7. When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32`` - after defining the shape: - ``ndarray of shape (n_samples,), dtype=np.int32``. You can specify - multiple dtype as a set: - ``array-like of shape (n_samples,), dtype={np.float64, np.float32}``. - If one wants to mention arbitrary precision, use `integral` and - `floating` rather than the Python dtype `int` and `float`. When both - `int` and `floating` are supported, there is no need to specify the - dtype. - 8. When the default is ``None``, ``None`` only needs to be specified at the - end with ``default=None``. Be sure to include in the docstring, what it - means for the parameter or attribute to be ``None``. - -* For unwritten formatting rules, try to follow existing good works: - - * When bibliographic references are available with `arxiv `_ - or `Digital Object Identifier `_ identification numbers, - use the sphinx directives `:arxiv:` or `:doi:`. For example, see references in - :ref:`Spectral Clustering Graphs `. - * For "References" in docstrings, see the Silhouette Coefficient - (:func:`sklearn.metrics.silhouette_score`). + * Section - to link to an arbitrary section in the documentation, use + reference labels (see `Sphinx docs + `_). + For example: -* When editing reStructuredText (``.rst``) files, try to keep line length under - 80 characters when possible (exceptions include links and tables). + .. code-block:: rst -* In scikit-learn reStructuredText files both single and double backticks - surrounding text will render as inline literal (often used for code, e.g., - `list`). This is due to specific configurations we have set. Single - backticks should be used nowadays. + .. _my-section: -* Before submitting your pull request check if your modifications have - introduced new sphinx warnings and try to fix them. + My section + ---------- -Cross-referencing ------------------ + This is the text of the section. + + To refer to itself use :ref:`my-section`. + + You should not modify existing sphinx reference labels as this would break + existing cross references and external links pointing to specific sections + in the scikit-learn documentation. + + * Glossary - linking to a term in the :ref:`glossary`: + + .. code-block:: rst + + :term:`cross_validation` + + * Function - to link to the documentation of a function, use the full import + path to the function: + + .. code-block:: rst + + :func:`~sklearn.model_selection.cross_val_score` + + However, if there is a `.. currentmodule::` directive above you in the document, + you will only need to use the path to the function succeeding the current + module specified. For example: + + .. code-block:: rst + + .. currentmodule:: sklearn.model_selection + + :func:`cross_val_score` + + * Class - to link to documentation of a class, use the full import path to the + class, unless there is a 'currentmodule' directive in the document above + (see above): + + .. code-block:: rst + + :class:`~sklearn.preprocessing.StandardScaler` + +|details-end| + +You can edit the documentation using any text editor, and then generate the +HTML output by following :ref:`building_documentation`. The resulting HTML files +will be placed in ``_build/html/stable`` and are viewable in a web browser, for +instance by opening the local ``_build/html/stable/index.html`` file. + + +.. _building_documentation: + +Building the documentation +-------------------------- -It is often useful to cross-reference to other pages in the scikit-learn -documentation. This should be done with reStructuredText cross-referencing -syntax: +**Before submitting a pull request check if your modifications have introduced +new sphinx warnings by building the documentation locally and try to fix them.** -* Section - to link to an arbitrary section in the documentation, use reference - labels (see - `Sphinx docs `_). - For example: +First, make sure you have :ref:`properly installed ` +the development version. - .. code-block:: rst +.. + packaging is not needed once setuptools starts shipping packaging>=17.0 - .. _my-section: +Building the documentation requires installing some additional packages: - My section - ---------- +.. prompt:: bash $ - This is the text of the section. + pip install sphinx sphinx-gallery numpydoc matplotlib Pillow pandas \ + polars scikit-image packaging seaborn sphinx-prompt \ + sphinxext-opengraph sphinx-copybutton plotly pooch - To refer to itself use :ref:`my-section`. +To build the documentation, you need to be in the ``doc`` folder: - You should not modify existing sphinx reference labels as this would break - existing cross references and external links pointing to specific sections in - the scikit-learn documentation. +.. prompt:: bash $ -* Glossary - linking to a term in the :ref:`glossary`: + cd doc - .. code-block:: rst +In the vast majority of cases, you only need to generate the full web site, +without the example gallery: - :term:`cross_validation` +.. prompt:: bash $ -* Function - to link to the documentation of a function, use the full - import path to the function: + make - .. code-block:: rst +The documentation will be generated in the ``_build/html/stable`` directory +and are viewable in a web browser, for instance by opening the local +``_build/html/stable/index.html`` file. +To also generate the example gallery you can use: - :func:`~sklearn.model_selection.cross_val_score` +.. prompt:: bash $ - However, if there is a 'currentmodule' directive above you in the document, - you will only need to use the path to the function succeeding the current - module specified. For example: + make html - .. code-block:: rst +This will run all the examples, which takes a while. If you only want to +generate a few examples, you can use: - .. currentmodule:: sklearn.model_selection +.. prompt:: bash $ - :func:`cross_val_score` + EXAMPLES_PATTERN=your_regex_goes_here make html + +This is particularly useful if you are modifying a few examples. + +Set the environment variable `NO_MATHJAX=1` if you intend to view +the documentation in an offline setting. + +To build the PDF manual, run: + +.. prompt:: bash $ + + make latexpdf -* Class - to link to documentation of a class, use the full import path to the - class, unless there is a 'currentmodule' directive in the document above - (see above): +.. warning:: **Sphinx version** - .. code-block:: rst + While we do our best to have the documentation build under as many + versions of Sphinx as possible, the different versions tend to + behave slightly differently. To get the best results, you should + use the same version as the one we used on CircleCI. Look at this + `GitHub search `_ + to know the exact version. - :class:`~sklearn.preprocessing.StandardScaler` .. _generated_doc_CI: @@ -962,9 +997,9 @@ subpackages. For a more detailed `pytest` workflow, please refer to the We expect code coverage of new features to be at least around 90%. - -Writing matplotlib related tests --------------------------------- +|details-start| +**Writing matplotlib related tests** +|details-split| Test fixtures ensure that a set of tests will be executing with the appropriate initialization and cleanup. The scikit-learn test suite implements a fixture @@ -983,8 +1018,11 @@ argument:: def test_requiring_mpl_fixture(pyplot): # you can now safely use matplotlib -Workflow to improve test coverage ---------------------------------- +|details-end| + +|details-start| +**Workflow to improve test coverage** +|details-split| To test code coverage, you need to install the `coverage `_ package in addition to pytest. @@ -997,6 +1035,8 @@ To test code coverage, you need to install the `coverage 3. Loop. +|details-end| + .. _monitoring_performances: Monitoring performance @@ -1190,7 +1230,7 @@ to ``zero_one`` and call ``zero_one_loss`` from that function:: If an attribute is to be deprecated, use the decorator ``deprecated`` on a property. Please note that the -``property`` decorator should be placed before the ``deprecated`` +``deprecated`` decorator should be placed before the ``property`` decorator for the docstrings to be rendered properly. E.g., renaming an attribute ``labels_`` to ``classes_`` can be done as:: @@ -1325,6 +1365,10 @@ up this process by providing your feedback. retraction. Regarding docs: typos, grammar issues and disambiguations are better addressed immediately. +|details-start| +**Important aspects to be covered in any code review** +|details-split| + Here are a few important aspects that need to be covered in any code review, from high-level questions to a more detailed check-list. @@ -1374,10 +1418,13 @@ from high-level questions to a more detailed check-list. :ref:`saved_replies` includes some frequent comments that reviewers may make. +|details-end| + .. _communication: -Communication Guidelines ------------------------- +|details-start| +**Communication Guidelines** +|details-split| Reviewing open pull requests (PRs) helps move the project forward. It is a great way to get familiar with the codebase and should motivate the @@ -1406,11 +1453,13 @@ contributor to keep involved in the project. [1]_ .. [1] Adapted from the numpy `communication guidelines `_. +|details-end| + Reading the existing code base ============================== Reading and digesting an existing code base is always a difficult exercise -that takes time and experience to main. Even though we try to write simple +that takes time and experience to master. Even though we try to write simple code in general, understanding the code can seem overwhelming at first, given the sheer size of the project. Here is a list of tips that may help make this task easier and faster (in no particular order). @@ -1447,9 +1496,10 @@ make this task easier and faster (in no particular order). `_. ``out`` is then an iterable containing the values returned by ``some_function`` for each call. - We use `Cython `_ to write fast code. Cython code is - located in ``.pyx`` and ``.pxd`` files. Cython code has a more C-like - flavor: we use pointers, perform manual memory allocation, etc. Having - some minimal experience in C / C++ is pretty much mandatory here. + located in ``.pyx`` and ``.pxd`` files. Cython code has a more C-like flavor: + we use pointers, perform manual memory allocation, etc. Having some minimal + experience in C / C++ is pretty much mandatory here. For more information see + :ref:`cython`. - Master your tools. - With such a big project, being efficient with your favorite editor or diff --git a/doc/developers/cython.rst b/doc/developers/cython.rst new file mode 100644 index 0000000000000..82022ddcbcc56 --- /dev/null +++ b/doc/developers/cython.rst @@ -0,0 +1,156 @@ +.. _cython: + +Cython Best Practices, Conventions and Knowledge +================================================ + +This documents tips to develop Cython code in scikit-learn. + +Tips for developing with Cython in scikit-learn +----------------------------------------------- + +Tips to ease development +^^^^^^^^^^^^^^^^^^^^^^^^ + +* Time spent reading `Cython's documentation `_ is not time lost. + +* If you intend to use OpenMP: On MacOS, system's distribution of ``clang`` does not implement OpenMP. + You can install the ``compilers`` package available on ``conda-forge`` which comes with an implementation of OpenMP. + +* Activating `checks `_ might help. E.g. for activating boundscheck use: + + .. code-block:: bash + + export SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES=1 + +* `Start from scratch in a notebook `_ to understand how to use Cython and to get feedback on your work quickly. + If you plan to use OpenMP for your implementations in your Jupyter Notebook, do add extra compiler and linkers arguments in the Cython magic. + + .. code-block:: python + + # For GCC and for clang + %%cython --compile-args=-fopenmp --link-args=-fopenmp + # For Microsoft's compilers + %%cython --compile-args=/openmp --link-args=/openmp + +* To debug C code (e.g. a segfault), do use ``gdb`` with: + + .. code-block:: bash + + gdb --ex r --args python ./entrypoint_to_bug_reproducer.py + +* To have access to some value in place to debug in ``cdef (nogil)`` context, use: + + .. code-block:: cython + + with gil: + print(state_to_print) + +* Note that Cython cannot parse f-strings with ``{var=}`` expressions, e.g. + + .. code-block:: bash + + print(f"{test_val=}") + +* scikit-learn codebase has a lot of non-unified (fused) types (re)definitions. + There currently is `ongoing work to simplify and unify that across the codebase + `_. + For now, make sure you understand which concrete types are used ultimately. + +* You might find this alias to compile individual Cython extension handy: + + .. code-block:: + + # You might want to add this alias to your shell script config. + alias cythonX="cython -X language_level=3 -X boundscheck=False -X wraparound=False -X initializedcheck=False -X nonecheck=False -X cdivision=True" + + # This generates `source.c` as if you had recompiled scikit-learn entirely. + cythonX --annotate source.pyx + +* Using the ``--annotate`` option with this flag allows generating a HTML report of code annotation. + This report indicates interactions with the CPython interpreter on a line-by-line basis. + Interactions with the CPython interpreter must be avoided as much as possible in + the computationally intensive sections of the algorithms. + For more information, please refer to `this section of Cython's tutorial `_ + + .. code-block:: + + # This generates a HTML report (`source.html`) for `source.c`. + cythonX --annotate source.pyx + +Tips for performance +^^^^^^^^^^^^^^^^^^^^ + +* Understand the GIL in context for CPython (which problems it solves, what are its limitations) + and get a good understanding of when Cython will be mapped to C code free of interactions with + CPython, when it will not, and when it cannot (e.g. presence of interactions with Python + objects, which include functions). In this regard, `PEP073 `_ + provides a good overview and context and pathways for removal. + +* Make sure you have deactivated `checks `_. + +* Always prefer memoryviews instead over ``cnp.ndarray`` when possible: memoryviews are lightweight. + +* Avoid memoryview slicing: memoryview slicing might be costly or misleading in some cases and + we better not use it, even if handling fewer dimensions in some context would be preferable. + +* Decorate final classes or methods with ``@final`` (this allows removing virtual tables when needed) + +* Inline methods and function when it makes sense + +* Make sure your Cython compilation units `use NumPy recent C API `_. + +* In doubt, read the generated C or C++ code if you can: "The fewer C instructions and indirections + for a line of Cython code, the better" is a good rule of thumb. + +* ``nogil`` declarations are just hints: when declaring the ``cdef`` functions + as nogil, it means that they can be called without holding the GIL, but it does not release + the GIL when entering them. You have to do that yourself either by passing ``nogil=True`` to + ``cython.parallel.prange`` explicitly, or by using an explicit context manager: + + .. code-block:: cython + + cdef inline void my_func(self) nogil: + + # Some logic interacting with CPython, e.g. allocating arrays via NumPy. + + with nogil: + # The code here is run as is it were written in C. + + return 0 + + This item is based on `this comment from Stéfan's Benhel `_ + +* Direct calls to BLAS routines are possible via interfaces defined in ``sklearn.utils._cython_blas``. + +Using OpenMP +^^^^^^^^^^^^ + +Since scikit-learn can be built without OpenMP, it's necessary to protect each +direct call to OpenMP. + +The `_openmp_helpers` module, available in +`sklearn/utils/_openmp_helpers.pyx `_ +provides protected versions of the OpenMP routines. To use OpenMP routines, they +must be ``cimported`` from this module and not from the OpenMP library directly: + +.. code-block:: cython + + from sklearn.utils._openmp_helpers cimport omp_get_max_threads + max_threads = omp_get_max_threads() + + +The parallel loop, `prange`, is already protected by cython and can be used directly +from `cython.parallel`. + +Types +~~~~~ + +Cython code requires to use explicit types. This is one of the reasons you get a +performance boost. In order to avoid code duplication, we have a central place +for the most used types in +`sklearn/utils/_typedefs.pyd `_. +Ideally you start by having a look there and `cimport` types you need, for example + +.. code-block:: cython + + from sklear.utils._typedefs cimport float32, float64 diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 3476e00d98fd5..97cb156da5812 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -54,8 +54,8 @@ multiple interfaces): :Transformer: - For filtering or modifying the data, in a supervised or unsupervised - way, implements:: + For modifying the data in a supervised or unsupervised way (e.g. by adding, changing, + or removing columns, but not by adding or removing rows). Implements:: new_data = transformer.transform(data) @@ -282,12 +282,16 @@ the correct interface more easily. in the scikit-learn-contrib `project template `__. + It is particularly important to notice that mixins should be "on the left" while + the ``BaseEstimator`` should be "on the right" in the inheritance list for proper + MRO. + >>> import numpy as np >>> from sklearn.base import BaseEstimator, ClassifierMixin >>> from sklearn.utils.validation import check_X_y, check_array, check_is_fitted >>> from sklearn.utils.multiclass import unique_labels >>> from sklearn.metrics import euclidean_distances - >>> class TemplateClassifier(BaseEstimator, ClassifierMixin): + >>> class TemplateClassifier(ClassifierMixin, BaseEstimator): ... ... def __init__(self, demo_param='demo'): ... self.demo_param = demo_param @@ -349,7 +353,7 @@ The parameter `deep` will control whether or not the parameters of the subestimator__intercept_scaling -> 1 subestimator__l1_ratio -> None subestimator__max_iter -> 100 - subestimator__multi_class -> auto + subestimator__multi_class -> deprecated subestimator__n_jobs -> None subestimator__penalty -> l2 subestimator__random_state -> None @@ -414,7 +418,7 @@ trailing ``_`` is used to check if the estimator has been fitted. Cloning ------- -For use with the :mod:`model_selection` module, +For use with the :mod:`~sklearn.model_selection` module, an estimator must support the ``base.clone`` function to replicate an estimator. This can be done by providing a ``get_params`` method. If ``get_params`` is present, then ``clone(estimator)`` will be an instance of @@ -425,6 +429,31 @@ Objects that do not provide this method will be deep-copied (using the Python standard function ``copy.deepcopy``) if ``safe=False`` is passed to ``clone``. +Estimators can customize the behavior of :func:`base.clone` by defining a +`__sklearn_clone__` method. `__sklearn_clone__` must return an instance of the +estimator. `__sklearn_clone__` is useful when an estimator needs to hold on to +some state when :func:`base.clone` is called on the estimator. For example, a +frozen meta-estimator for transformers can be defined as follows:: + + class FrozenTransformer(BaseEstimator): + def __init__(self, fitted_transformer): + self.fitted_transformer = fitted_transformer + + def __getattr__(self, name): + # `fitted_transformer`'s attributes are now accessible + return getattr(self.fitted_transformer, name) + + def __sklearn_clone__(self): + return self + + def fit(self, X, y): + # Fitting does not change the state of the estimator + return self + + def fit_transform(self, X, y=None): + # fit_transform only transforms the data + return self.fitted_transformer.transform(X, y) + Pipeline compatibility ---------------------- For an estimator to be usable together with ``pipeline.Pipeline`` in any but the @@ -483,7 +512,7 @@ independent term is stored in ``intercept_``. ``sklearn.linear_model._base`` contains a few base classes and mixins that implement common linear model patterns. -The :mod:`sklearn.utils.multiclass` module contains useful functions +The :mod:`~sklearn.utils.multiclass` module contains useful functions for working with multiclass and multilabel problems. .. _estimator_tags: @@ -508,7 +537,10 @@ general only be determined at runtime. The current set of estimator tags are: allow_nan (default=False) - whether the estimator supports data with missing values encoded as np.NaN + whether the estimator supports data with missing values encoded as np.nan + +array_api_support (default=False) + whether the estimator supports Array API compatible inputs. binary_only (default=False) whether estimator supports binary classification but lacks multi-class @@ -540,7 +572,7 @@ pairwise (default=False) or a cross validation procedure that extracts a sub-sample of data intended for a pairwise estimator, where the data needs to be indexed on both axes. Specifically, this tag is used by - :func:`~sklearn.utils.metaestimators._safe_split` to slice rows and + `sklearn.utils.metaestimators._safe_split` to slice rows and columns. preserves_dtype (default=``[np.float64]``) @@ -681,6 +713,54 @@ only wrap the first array and not alter the other arrays. See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py` for an example on how to use the API. +.. _developer_api_check_is_fitted: + +Developer API for `check_is_fitted` +=================================== + +By default :func:`~sklearn.utils.validation.check_is_fitted` checks if there +are any attributes in the instance with a trailing underscore, e.g. `coef_`. +An estimator can change the behavior by implementing a `__sklearn_is_fitted__` +method taking no input and returning a boolean. If this method exists, +:func:`~sklearn.utils.validation.check_is_fitted` simply returns its output. + +See :ref:`sphx_glr_auto_examples_developing_estimators_sklearn_is_fitted.py` +for an example on how to use the API. + +Developer API for HTML representation +===================================== + +.. warning:: + + The HTML representation API is experimental and the API is subject to change. + +Estimators inheriting from :class:`~sklearn.base.BaseEstimator` display +a HTML representation of themselves in interactive programming +environments such as Jupyter notebooks. For instance, we can display this HTML +diagram:: + + from sklearn.base import BaseEstimator + + BaseEstimator() + +The raw HTML representation is obtained by invoking the function +:func:`~sklearn.utils.estimator_html_repr` on an estimator instance. + +To customize the URL linking to an estimator's documentation (i.e. when clicking on the +"?" icon), override the `_doc_link_module` and `_doc_link_template` attributes. In +addition, you can provide a `_doc_link_url_param_generator` method. Set +`_doc_link_module` to the name of the (top level) module that contains your estimator. +If the value does not match the top level module name, the HTML representation will not +contain a link to the documentation. For scikit-learn estimators this is set to +`"sklearn"`. + +The `_doc_link_template` is used to construct the final URL. By default, it can contain +two variables: `estimator_module` (the full name of the module containing the estimator) +and `estimator_name` (the class name of the estimator). If you need more variables you +should implement the `_doc_link_url_param_generator` method which should return a +dictionary of the variables and their values. This dictionary will be used to render the +`_doc_link_template`. + .. _coding-guidelines: Coding guidelines @@ -827,7 +907,7 @@ Numerical assertions in tests ----------------------------- When asserting the quasi-equality of arrays of continuous values, -do use :func:`sklearn.utils._testing.assert_allclose`. +do use `sklearn.utils._testing.assert_allclose`. The relative tolerance is automatically inferred from the provided arrays dtypes (for float32 and float64 dtypes in particular) but you can override @@ -837,4 +917,4 @@ When comparing arrays of zero-elements, please do provide a non-zero value for the absolute tolerance via ``atol``. For more information, please refer to the docstring of -:func:`sklearn.utils._testing.assert_allclose`. +`sklearn.utils._testing.assert_allclose`. diff --git a/doc/developers/index.rst b/doc/developers/index.rst index bd1ee815e25bf..c2cc35928cbf9 100644 --- a/doc/developers/index.rst +++ b/doc/developers/index.rst @@ -19,6 +19,7 @@ Developer's Guide tips utilities performance + cython advanced_installation bug_triaging maintainer diff --git a/doc/developers/maintainer.rst b/doc/developers/maintainer.rst index 41fd571ae0389..70d132d2af604 100644 --- a/doc/developers/maintainer.rst +++ b/doc/developers/maintainer.rst @@ -17,6 +17,11 @@ Before a release 1. Update authors table: + Create a `classic token on GitHub `_ + with the ``read:org`` following permission. + + Run the following script, entering the token in: + .. prompt:: bash $ cd build_tools; make authors; cd .. @@ -43,14 +48,16 @@ Before a release **Permissions** -The release manager requires a set of permissions on top of the usual -permissions given to maintainers, which includes: +The release manager must be a *maintainer* of the ``scikit-learn/scikit-learn`` +repository to be able to publish on ``pypi.org`` and ``test.pypi.org`` +(via a manual trigger of a dedicated Github Actions workflow). -- *maintainer* role on ``scikit-learn`` projects on ``pypi.org`` and - ``test.pypi.org``, separately. -- become a member of the *scikit-learn* team on conda-forge by editing the - ``recipe/meta.yaml`` file on - ``https://github.com/conda-forge/scikit-learn-feedstock`` +The release manager does not need extra permissions on ``pypi.org`` to publish a +release in particular. + +The release manager must be a *maintainer* of the ``conda-forge/scikit-learn-feedstock`` +repository. This can be changed by editing the ``recipe/meta.yaml`` file in the +first release pull-request. .. _preparing_a_release_pr: @@ -74,16 +81,16 @@ tag under that branch. This is done only once, as the major and minor releases happen on the same branch: - .. prompt:: bash $ +.. prompt:: bash $ - # Assuming upstream is an alias for the main scikit-learn repo: - git fetch upstream main - git checkout upstream/main - git checkout -b 0.99.X - git push --set-upstream upstream 0.99.X + # Assuming upstream is an alias for the main scikit-learn repo: + git fetch upstream main + git checkout upstream/main + git checkout -b 0.99.X + git push --set-upstream upstream 0.99.X - Again, `X` is literal here, and `99` is replaced by the release number. - The branches are called ``0.19.X``, ``0.20.X``, etc. +Again, `X` is literal here, and `99` is replaced by the release number. +The branches are called ``0.19.X``, ``0.20.X``, etc. In terms of including changes, the first RC ideally counts as a *feature freeze*. Each coming release candidate and the final release afterwards will @@ -98,43 +105,82 @@ in the description of the Pull Request to track progress. This PR will be used to push commits related to the release as explained in :ref:`making_a_release`. -You can also create a second PR from main and targeting main to increment -the ``__version__`` variable in `sklearn/__init__.py` to increment the dev -version. This means while we're in the release candidate period, the latest -stable is two versions behind the main branch, instead of one. In this PR -targeting main you should also include a new file for the matching version -under the ``doc/whats_new/`` folder so PRs that target the next version can -contribute their changelog entries to this file in parallel to the release -process. +You can also create a second PR from main and targeting main to increment the +``__version__`` variable in `sklearn/__init__.py` and in `pyproject.toml` to increment +the dev version. This means while we're in the release candidate period, the latest +stable is two versions behind the main branch, instead of one. In this PR targeting +main you should also include a new file for the matching version under the +``doc/whats_new/`` folder so PRs that target the next version can contribute their +changelog entries to this file in parallel to the release process. -Minor version release -~~~~~~~~~~~~~~~~~~~~~ +Minor version release (also known as bug-fix release) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The minor releases should include bug fixes and some relevant documentation changes only. Any PR resulting in a behavior change which is not a bug fix -should be excluded. +should be excluded. As an example, instructions are given for the `1.2.2` release. -First, create a branch, **on your own fork** (to release e.g. `0.99.3`): +- Create a branch, **on your own fork** (here referred to as `fork`) for the release + from `upstream/main`. -.. prompt:: bash $ + .. prompt:: bash $ - # assuming main and upstream/main are the same - git checkout -b release-0.99.3 main + git fetch upstream/main + git checkout -b release-1.2.2 upstream/main + git push -u fork release-1.2.2:release-1.2.2 -Then, create a PR **to the** `scikit-learn/0.99.X` **branch** (not to -main!) with all the desired changes: +- Create a **draft** PR to the `upstream/1.2.X` branch (not to `upstream/main`) + with all the desired changes. -.. prompt:: bash $ +- Do not push anything on that branch yet. + +- Locally rebase `release-1.2.2` from the `upstream/1.2.X` branch using: + + .. prompt:: bash $ + + git rebase -i upstream/1.2.X + + This will open an interactive rebase with the `git-rebase-todo` containing all + the latest commit on `main`. At this stage, you have to perform + this interactive rebase with at least someone else (being three people rebasing + is better not to forget something and to avoid any doubt). + + - **Do not remove lines but drop commit by replace** ``pick`` **with** ``drop`` - git rebase -i upstream/0.99.2 + - Commits to pick for bug-fix release *generally* are prefixed with: `FIX`, `CI`, + `DOC`. They should at least include all the commits of the merged PRs + that were milestoned for this release on GitHub and/or documented as such in + the changelog. It's likely that some bugfixes were documented in the + changelog of the main major release instead of the next bugfix release, + in which case, the matching changelog entries will need to be moved, + first in the `main` branch then backported in the release PR. -Copy the :ref:`release_checklist` templates in the description of the Pull -Request to track progress. + - Commits to drop for bug-fix release *generally* are prefixed with: `FEAT`, + `MAINT`, `ENH`, `API`. Reasons for not including them is to prevent change of + behavior (which only must feature in breaking or major releases). -Do not forget to add a commit updating ``sklearn.__version__``. + - After having dropped or picked commit, **do no exit** but paste the content + of the `git-rebase-todo` message in the PR. + This file is located at `.git/rebase-merge/git-rebase-todo`. -It's nice to have a copy of the ``git rebase -i`` log in the PR to help others -understand what's included. + - Save and exit, starting the interactive rebase. + + - Resolve merge conflicts when they happen. + +- Force push the result of the rebase and the extra release commits to the release PR: + + .. prompt:: bash $ + + git push -f fork release-1.2.2:release-1.2.2 + +- Copy the :ref:`release_checklist` template and paste it in the description of the + Pull Request to track progress. + +- Review all the commits included in the release to make sure that they do not + introduce any new feature. We should not blindly trust the commit message prefixes. + +- Remove the draft status of the release PR and invite other maintainers to review the + list of included commits. .. _making_a_release: @@ -161,10 +207,12 @@ Making a release - Update the release date in ``whats_new.rst`` - Edit the ``doc/templates/index.html`` to change the 'News' entry of the - front page (with the release month as well). + front page (with the release month as well). Do not forget to remove + the old entries (two years or three releases are typically good + enough) and to update the on-going development entry. -2. On the branch for releasing, update the version number in - ``sklearn/__init__.py``, the ``__version__``. +2. On the branch for releasing, update the version number in ``sklearn/__init__.py``, + the ``__version__`` variable, and in `pyproject.toml`. For major releases, please add a 0 at the end: `0.99.0` instead of `0.99`. @@ -301,7 +349,7 @@ The following GitHub checklist might be helpful in a release PR:: * [ ] update news and what's new date in release branch * [ ] update news and what's new date and sklearn dev0 version in main branch - * [ ] check that the for the release wheels can be built successfully + * [ ] check that the wheels for the release can be built successfully * [ ] merge the PR with `[cd build]` commit message to upload wheels to the staging repo * [ ] upload the wheels and source tarball to https://test.pypi.org * [ ] create tag on the main github repo @@ -310,6 +358,9 @@ The following GitHub checklist might be helpful in a release PR:: * [ ] upload the wheels and source tarball to PyPI * [ ] https://github.com/scikit-learn/scikit-learn/releases publish (except for RC) * [ ] announce on mailing list and on Twitter, and LinkedIn + * [ ] update symlink for stable in + https://github.com/scikit-learn/scikit-learn.github.io (only major/minor) + * [ ] update SECURITY.md in main branch (except for RC) Merging Pull Requests --------------------- @@ -325,44 +376,20 @@ Before merging, the `Co-authored-by: name ` tags in the detailed description. This will mark the PR as having `multiple co-authors `_. - Whether code contributions are significanly enough to merit co-authorship is + Whether code contributions are significantly enough to merit co-authorship is left to the maintainer's discretion, same as for the "what's new" entry. The scikit-learn.org web site ----------------------------- -The scikit-learn web site (http://scikit-learn.org) is hosted at GitHub, +The scikit-learn web site (https://scikit-learn.org) is hosted at GitHub, but should rarely be updated manually by pushing to the https://github.com/scikit-learn/scikit-learn.github.io repository. Most updates can be made by pushing to master (for /dev) or a release branch like 0.99.X, from which Circle CI builds and uploads the documentation automatically. -Travis Cron jobs ----------------- - -From ``_: Travis CI cron jobs work -similarly to the cron utility, they run builds at regular scheduled intervals -independently of whether any commits were pushed to the repository. Cron jobs -always fetch the most recent commit on a particular branch and build the project -at that state. Cron jobs can run daily, weekly or monthly, which in practice -means up to an hour after the selected time span, and you cannot set them to run -at a specific time. - -For scikit-learn, Cron jobs are used for builds that we do not want to run in -each PR. As an example the build with the dev versions of numpy and scipy is -run as a Cron job. Most of the time when this numpy-dev build fail, it is -related to a numpy change and not a scikit-learn one, so it would not make sense -to blame the PR author for the Travis failure. - -The definition of what gets run in the Cron job is done in the .travis.yml -config file, exactly the same way as the other Travis jobs. We use a ``if: type -= cron`` filter in order for the build to be run only in Cron jobs. - -The branch targeted by the Cron job and the frequency of the Cron job is set -via the web UI at https://www.travis-ci.org/scikit-learn/scikit-learn/settings. - Experimental features --------------------- @@ -371,8 +398,8 @@ experimental features / estimators that are subject to change without deprecation cycle. To create an experimental module, you can just copy and modify the content of -`enable_hist_gradient_boosting.py -`__, +`enable_halving_search_cv.py +`__, or `enable_iterative_imputer.py `_. diff --git a/doc/developers/minimal_reproducer.rst b/doc/developers/minimal_reproducer.rst index 2cc82d083aaf1..b100bccbaa6b4 100644 --- a/doc/developers/minimal_reproducer.rst +++ b/doc/developers/minimal_reproducer.rst @@ -88,9 +88,9 @@ The following code, while **still not minimal**, is already **much better** because it can be copy-pasted in a Python terminal to reproduce the problem in one step. In particular: - - it contains **all necessary imports statements**; - - it can fetch the public dataset without having to manually download a - file and put it in the expected location on the disk. +- it contains **all necessary imports statements**; +- it can fetch the public dataset without having to manually download a + file and put it in the expected location on the disk. **Improved example** @@ -199,21 +199,21 @@ As already mentioned, the key to communication is the readability of the code and good formatting can really be a plus. Notice that in the previous snippet we: - - try to limit all lines to a maximum of 79 characters to avoid horizontal - scrollbars in the code snippets blocks rendered on the GitHub issue; - - use blank lines to separate groups of related functions; - - place all the imports in their own group at the beginning. +- try to limit all lines to a maximum of 79 characters to avoid horizontal + scrollbars in the code snippets blocks rendered on the GitHub issue; +- use blank lines to separate groups of related functions; +- place all the imports in their own group at the beginning. The simplification steps presented in this guide can be implemented in a different order than the progression we have shown here. The important points are: - - a minimal reproducer should be runnable by a simple copy-and-paste in a - python terminal; - - it should be simplified as much as possible by removing any code steps - that are not strictly needed to reproducing the original problem; - - it should ideally only rely on a minimal dataset generated on-the-fly by - running the code instead of relying on external data, if possible. +- a minimal reproducer should be runnable by a simple copy-and-paste in a + python terminal; +- it should be simplified as much as possible by removing any code steps + that are not strictly needed to reproducing the original problem; +- it should ideally only rely on a minimal dataset generated on-the-fly by + running the code instead of relying on external data, if possible. Use markdown formatting @@ -305,50 +305,50 @@ can be used to create dummy numeric data. - regression - Regressions take continuous numeric data as features and target. + Regressions take continuous numeric data as features and target. - .. code-block:: python + .. code-block:: python - import numpy as np + import numpy as np - rng = np.random.RandomState(0) - n_samples, n_features = 5, 5 - X = rng.randn(n_samples, n_features) - y = rng.randn(n_samples) + rng = np.random.RandomState(0) + n_samples, n_features = 5, 5 + X = rng.randn(n_samples, n_features) + y = rng.randn(n_samples) A similar snippet can be used as synthetic data when testing scaling tools such as :class:`sklearn.preprocessing.StandardScaler`. - classification - If the bug is not raised during when encoding a categorical variable, you can - feed numeric data to a classifier. Just remember to ensure that the target - is indeed an integer. + If the bug is not raised during when encoding a categorical variable, you can + feed numeric data to a classifier. Just remember to ensure that the target + is indeed an integer. - .. code-block:: python + .. code-block:: python - import numpy as np + import numpy as np - rng = np.random.RandomState(0) - n_samples, n_features = 5, 5 - X = rng.randn(n_samples, n_features) - y = rng.randint(0, 2, n_samples) # binary target with values in {0, 1} + rng = np.random.RandomState(0) + n_samples, n_features = 5, 5 + X = rng.randn(n_samples, n_features) + y = rng.randint(0, 2, n_samples) # binary target with values in {0, 1} - If the bug only happens with non-numeric class labels, you might want to - generate a random target with `numpy.random.choice - `_. + If the bug only happens with non-numeric class labels, you might want to + generate a random target with `numpy.random.choice + `_. - .. code-block:: python + .. code-block:: python - import numpy as np + import numpy as np - rng = np.random.RandomState(0) - n_samples, n_features = 50, 5 - X = rng.randn(n_samples, n_features) - y = np.random.choice( - ["male", "female", "other"], size=n_samples, p=[0.49, 0.49, 0.02] - ) + rng = np.random.RandomState(0) + n_samples, n_features = 50, 5 + X = rng.randn(n_samples, n_features) + y = np.random.choice( + ["male", "female", "other"], size=n_samples, p=[0.49, 0.49, 0.02] + ) Pandas ------ diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst index c6fcc99b26102..42687945a2bba 100644 --- a/doc/developers/performance.rst +++ b/doc/developers/performance.rst @@ -46,31 +46,31 @@ Sometimes however an algorithm cannot be expressed efficiently in simple vectorized Numpy code. In this case, the recommended strategy is the following: - 1. **Profile** the Python implementation to find the main bottleneck and - isolate it in a **dedicated module level function**. This function - will be reimplemented as a compiled extension module. - - 2. If there exists a well maintained BSD or MIT **C/C++** implementation - of the same algorithm that is not too big, you can write a - **Cython wrapper** for it and include a copy of the source code - of the library in the scikit-learn source tree: this strategy is - used for the classes :class:`svm.LinearSVC`, :class:`svm.SVC` and - :class:`linear_model.LogisticRegression` (wrappers for liblinear - and libsvm). - - 3. Otherwise, write an optimized version of your Python function using - **Cython** directly. This strategy is used - for the :class:`linear_model.ElasticNet` and - :class:`linear_model.SGDClassifier` classes for instance. - - 4. **Move the Python version of the function in the tests** and use - it to check that the results of the compiled extension are consistent - with the gold standard, easy to debug Python version. - - 5. Once the code is optimized (not simple bottleneck spottable by - profiling), check whether it is possible to have **coarse grained - parallelism** that is amenable to **multi-processing** by using the - ``joblib.Parallel`` class. +1. **Profile** the Python implementation to find the main bottleneck and + isolate it in a **dedicated module level function**. This function + will be reimplemented as a compiled extension module. + +2. If there exists a well maintained BSD or MIT **C/C++** implementation + of the same algorithm that is not too big, you can write a + **Cython wrapper** for it and include a copy of the source code + of the library in the scikit-learn source tree: this strategy is + used for the classes :class:`svm.LinearSVC`, :class:`svm.SVC` and + :class:`linear_model.LogisticRegression` (wrappers for liblinear + and libsvm). + +3. Otherwise, write an optimized version of your Python function using + **Cython** directly. This strategy is used + for the :class:`linear_model.ElasticNet` and + :class:`linear_model.SGDClassifier` classes for instance. + +4. **Move the Python version of the function in the tests** and use + it to check that the results of the compiled extension are consistent + with the gold standard, easy to debug Python version. + +5. Once the code is optimized (not simple bottleneck spottable by + profiling), check whether it is possible to have **coarse grained + parallelism** that is amenable to **multi-processing** by using the + ``joblib.Parallel`` class. When using Cython, use either @@ -187,7 +187,7 @@ us install ``line_profiler`` and wire it to IPython: pip install line_profiler -- **Under IPython 0.13+**, first create a configuration profile: +**Under IPython 0.13+**, first create a configuration profile: .. prompt:: bash $ @@ -265,7 +265,7 @@ install the latest version: Then, setup the magics in a manner similar to ``line_profiler``. -- **Under IPython 0.11+**, first create a configuration profile: +**Under IPython 0.11+**, first create a configuration profile: .. prompt:: bash $ @@ -313,8 +313,8 @@ For more details, see the docstrings of the magics, using ``%memit?`` and ``%mprun?``. -Performance tips for the Cython developer -========================================= +Using Cython +============ If profiling of the Python code reveals that the Python interpreter overhead is larger by one order of magnitude or more than the cost of the @@ -325,46 +325,9 @@ standalone function in a ``.pyx`` file, add static type declarations and then use Cython to generate a C program suitable to be compiled as a Python extension module. -The official documentation available at http://docs.cython.org/ contains -a tutorial and reference guide for developing such a module. In the -following we will just highlight a couple of tricks that we found -important in practice on the existing cython codebase in the scikit-learn -project. - -TODO: html report, type declarations, bound checks, division by zero checks, -memory alignment, direct blas calls... - -- https://www.youtube.com/watch?v=gMvkiQ-gOW8 -- http://conference.scipy.org/proceedings/SciPy2009/paper_1/ -- http://conference.scipy.org/proceedings/SciPy2009/paper_2/ - -Using OpenMP ------------- - -Since scikit-learn can be built without OpenMP, it's necessary to protect each -direct call to OpenMP. - -There are some helpers in -[sklearn/utils/_openmp_helpers.pyx](https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/utils/_openmp_helpers.pyx) -that you can reuse for the main useful functionalities and already have the -necessary protection to be built without OpenMP. - -If the helpers are not enough, you need to protect your OpenMP code using the -following syntax:: - - # importing OpenMP - IF SKLEARN_OPENMP_PARALLELISM_ENABLED: - cimport openmp - - # calling OpenMP - IF SKLEARN_OPENMP_PARALLELISM_ENABLED: - max_threads = openmp.omp_get_max_threads() - ELSE: - max_threads = 1 - -.. note:: - - Protecting the parallel loop, ``prange``, is already done by cython. +The `Cython's documentation `_ contains a tutorial and +reference guide for developing such a module. +For more information about developing in Cython for scikit-learn, see :ref:`cython`. .. _profiling-compiled-extension: @@ -383,7 +346,29 @@ Using yep and gperftools Easy profiling without special compilation options use yep: - https://pypi.org/project/yep/ -- http://fa.bianp.net/blog/2011/a-profiler-for-python-extensions +- https://fa.bianp.net/blog/2011/a-profiler-for-python-extensions + +Using a debugger, gdb +--------------------- + +* It is helpful to use ``gdb`` to debug. In order to do so, one must use + a Python interpreter built with debug support (debug symbols and proper + optimization). To create a new conda environment (which you might need + to deactivate and reactivate after building/installing) with a source-built + CPython interpreter: + + .. code-block:: bash + + git clone https://github.com/python/cpython.git + conda create -n debug-scikit-dev + conda activate debug-scikit-dev + cd cpython + mkdir debug + cd debug + ../configure --prefix=$CONDA_PREFIX --with-pydebug + make EXTRA_CFLAGS='-DPy_DEBUG' -j + make install + Using gprof ----------- diff --git a/doc/developers/plotting.rst b/doc/developers/plotting.rst index b0e8b3b43ee45..9acc3ef4a5061 100644 --- a/doc/developers/plotting.rst +++ b/doc/developers/plotting.rst @@ -8,7 +8,7 @@ Scikit-learn defines a simple API for creating visualizations for machine learning. The key features of this API is to run calculations once and to have the flexibility to adjust the visualizations after the fact. This section is intended for developers who wish to develop or maintain plotting tools. For -usage, users should refer to the :ref`User Guide `. +usage, users should refer to the :ref:`User Guide `. Plotting API Overview --------------------- @@ -87,7 +87,7 @@ be placed. In this case, we suggest using matplotlib's By default, the `ax` keyword in `plot` is `None`. In this case, the single axes is created and the gridspec api is used to create the regions to plot in. -See for example, :func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator +See for example, :meth:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` which plots multiple lines and contours using this API. The axes defining the bounding box is saved in a `bounding_ax_` attribute. The individual axes created are stored in an `axes_` ndarray, corresponding to the axes position on diff --git a/doc/developers/tips.rst b/doc/developers/tips.rst index 7bef6580c1a6e..3dbc35cec68d0 100644 --- a/doc/developers/tips.rst +++ b/doc/developers/tips.rst @@ -73,27 +73,25 @@ will run all :term:`common tests` for the ``LogisticRegression`` estimator. When a unit test fails, the following tricks can make debugging easier: - 1. The command line argument ``pytest -l`` instructs pytest to print the local - variables when a failure occurs. +1. The command line argument ``pytest -l`` instructs pytest to print the local + variables when a failure occurs. - 2. The argument ``pytest --pdb`` drops into the Python debugger on failure. To - instead drop into the rich IPython debugger ``ipdb``, you may set up a - shell alias to: +2. The argument ``pytest --pdb`` drops into the Python debugger on failure. To + instead drop into the rich IPython debugger ``ipdb``, you may set up a + shell alias to: -.. prompt:: bash $ + .. prompt:: bash $ - pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no + pytest --pdbcls=IPython.terminal.debugger:TerminalPdb --capture no Other `pytest` options that may become useful include: - - ``-x`` which exits on the first failed test - - ``--lf`` to rerun the tests that failed on the previous run - - ``--ff`` to rerun all previous tests, running the ones that failed first - - ``-s`` so that pytest does not capture the output of ``print()`` - statements - - ``--tb=short`` or ``--tb=line`` to control the length of the logs - - ``--runxfail`` also run tests marked as a known failure (XFAIL) and report - errors. +- ``-x`` which exits on the first failed test, +- ``--lf`` to rerun the tests that failed on the previous run, +- ``--ff`` to rerun all previous tests, running the ones that failed first, +- ``-s`` so that pytest does not capture the output of ``print()`` statements, +- ``--tb=short`` or ``--tb=line`` to control the length of the logs, +- ``--runxfail`` also run tests marked as a known failure (XFAIL) and report errors. Since our continuous integration tests will error if ``FutureWarning`` isn't properly caught, @@ -114,113 +112,135 @@ replies `_ for reviewing: Note that putting this content on a single line in a literal is the easiest way to make it copyable and wrapped on screen. Issue: Usage questions - :: - You are asking a usage question. The issue tracker is for bugs and new features. For usage questions, it is recommended to try [Stack Overflow](https://stackoverflow.com/questions/tagged/scikit-learn) or [the Mailing List](https://mail.python.org/mailman/listinfo/scikit-learn). +:: + + You are asking a usage question. The issue tracker is for bugs and new features. For usage questions, it is recommended to try [Stack Overflow](https://stackoverflow.com/questions/tagged/scikit-learn) or [the Mailing List](https://mail.python.org/mailman/listinfo/scikit-learn). - Unfortunately, we need to close this issue as this issue tracker is a communication tool used for the development of scikit-learn. The additional activity created by usage questions crowds it too much and impedes this development. The conversation can continue here, however there is no guarantee that is will receive attention from core developers. + Unfortunately, we need to close this issue as this issue tracker is a communication tool used for the development of scikit-learn. The additional activity created by usage questions crowds it too much and impedes this development. The conversation can continue here, however there is no guarantee that it will receive attention from core developers. Issue: You're welcome to update the docs - :: - Please feel free to offer a pull request updating the documentation if you feel it could be improved. +:: + + Please feel free to offer a pull request updating the documentation if you feel it could be improved. Issue: Self-contained example for bug - :: - Please provide [self-contained example code](https://stackoverflow.com/help/mcve), including imports and data (if possible), so that other contributors can just run it and reproduce your issue. Ideally your example code should be minimal. +:: + + Please provide [self-contained example code](https://scikit-learn.org/dev/developers/minimal_reproducer.html), including imports and data (if possible), so that other contributors can just run it and reproduce your issue. Ideally your example code should be minimal. Issue: Software versions - :: - To help diagnose your issue, please paste the output of: - ```py - import sklearn; sklearn.show_versions() - ``` - Thanks. +:: + + To help diagnose your issue, please paste the output of: + ```py + import sklearn; sklearn.show_versions() + ``` + Thanks. Issue: Code blocks - :: - Readability can be greatly improved if you [format](https://help.github.com/articles/creating-and-highlighting-code-blocks/) your code snippets and complete error messages appropriately. For example: +:: + + Readability can be greatly improved if you [format](https://help.github.com/articles/creating-and-highlighting-code-blocks/) your code snippets and complete error messages appropriately. For example: - ```python - print(something) - ``` - generates: ```python print(something) ``` - And: - - ```pytb - Traceback (most recent call last): - File "", line 1, in - ImportError: No module named 'hello' - ``` - generates: + + generates: + + ```python + print(something) + ``` + + And: + ```pytb Traceback (most recent call last): - File "", line 1, in + File "", line 1, in ImportError: No module named 'hello' ``` - You can edit your issue descriptions and comments at any time to improve readability. This helps maintainers a lot. Thanks! + + generates: + + ```pytb + Traceback (most recent call last): + File "", line 1, in + ImportError: No module named 'hello' + ``` + + You can edit your issue descriptions and comments at any time to improve readability. This helps maintainers a lot. Thanks! Issue/Comment: Linking to code - :: - Friendly advice: for clarity's sake, you can link to code like [this](https://help.github.com/articles/creating-a-permanent-link-to-a-code-snippet/). +:: + + Friendly advice: for clarity's sake, you can link to code like [this](https://help.github.com/articles/creating-a-permanent-link-to-a-code-snippet/). Issue/Comment: Linking to comments - :: - Please use links to comments, which make it a lot easier to see what you are referring to, rather than just linking to the issue. See [this](https://stackoverflow.com/questions/25163598/how-do-i-reference-a-specific-issue-comment-on-github) for more details. +:: + + Please use links to comments, which make it a lot easier to see what you are referring to, rather than just linking to the issue. See [this](https://stackoverflow.com/questions/25163598/how-do-i-reference-a-specific-issue-comment-on-github) for more details. PR-NEW: Better description and title - :: - Thanks for the pull request! Please make the title of the PR more descriptive. The title will become the commit message when this is merged. You should state what issue (or PR) it fixes/resolves in the description using the syntax described [here](http://scikit-learn.org/dev/developers/contributing.html#contributing-pull-requests). +:: + + Thanks for the pull request! Please make the title of the PR more descriptive. The title will become the commit message when this is merged. You should state what issue (or PR) it fixes/resolves in the description using the syntax described [here](https://scikit-learn.org/dev/developers/contributing.html#contributing-pull-requests). PR-NEW: Fix # - :: - Please use "Fix #issueNumber" in your PR description (and you can do it more than once). This way the associated issue gets closed automatically when the PR is merged. For more details, look at [this](https://github.com/blog/1506-closing-issues-via-pull-requests). +:: + + Please use "Fix #issueNumber" in your PR description (and you can do it more than once). This way the associated issue gets closed automatically when the PR is merged. For more details, look at [this](https://github.com/blog/1506-closing-issues-via-pull-requests). PR-NEW or Issue: Maintenance cost - :: - Every feature we include has a [maintenance cost](http://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](http://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. Also, we expect PR authors to take part in the maintenance for the code they submit, at least initially. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](https://scikit-learn-contrib.github.io). +:: + + Every feature we include has a [maintenance cost](https://scikit-learn.org/dev/faq.html#why-are-you-so-selective-on-what-algorithms-you-include-in-scikit-learn). Our maintainers are mostly volunteers. For a new feature to be included, we need evidence that it is often useful and, ideally, [well-established](https://scikit-learn.org/dev/faq.html#what-are-the-inclusion-criteria-for-new-algorithms) in the literature or in practice. Also, we expect PR authors to take part in the maintenance for the code they submit, at least initially. That doesn't stop you implementing it for yourself and publishing it in a separate repository, or even [scikit-learn-contrib](https://scikit-learn-contrib.github.io). PR-WIP: What's needed before merge? - :: - Please clarify (perhaps as a TODO list in the PR description) what work you believe still needs to be done before it can be reviewed for merge. When it is ready, please prefix the PR title with `[MRG]`. +:: + + Please clarify (perhaps as a TODO list in the PR description) what work you believe still needs to be done before it can be reviewed for merge. When it is ready, please prefix the PR title with `[MRG]`. PR-WIP: Regression test needed - :: - Please add a [non-regression test](https://en.wikipedia.org/wiki/Non-regression_testing) that would fail at main but pass in this PR. +:: + + Please add a [non-regression test](https://en.wikipedia.org/wiki/Non-regression_testing) that would fail at main but pass in this PR. PR-WIP: PEP8 - :: - You have some [PEP8](https://www.python.org/dev/peps/pep-0008/) violations, whose details you can see in the Circle CI `lint` job. It might be worth configuring your code editor to check for such errors on the fly, so you can catch them before committing. +:: + + You have some [PEP8](https://www.python.org/dev/peps/pep-0008/) violations, whose details you can see in the Circle CI `lint` job. It might be worth configuring your code editor to check for such errors on the fly, so you can catch them before committing. PR-MRG: Patience - :: - Before merging, we generally require two core developers to agree that your pull request is desirable and ready. [Please be patient](http://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention), as we mostly rely on volunteered time from busy core developers. (You are also welcome to help us out with [reviewing other PRs](http://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines).) +:: + + Before merging, we generally require two core developers to agree that your pull request is desirable and ready. [Please be patient](https://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention), as we mostly rely on volunteered time from busy core developers. (You are also welcome to help us out with [reviewing other PRs](https://scikit-learn.org/dev/developers/contributing.html#code-review-guidelines).) PR-MRG: Add to what's new - :: - Please add an entry to the change log at `doc/whats_new/v*.rst`. Like the other entries there, please reference this pull request with `:pr:` and credit yourself (and other contributors if applicable) with `:user:`. +:: + + Please add an entry to the change log at `doc/whats_new/v*.rst`. Like the other entries there, please reference this pull request with `:pr:` and credit yourself (and other contributors if applicable) with `:user:`. PR: Don't change unrelated - :: - Please do not change unrelated lines. It makes your contribution harder to review and may introduce merge conflicts to other pull requests. +:: + + Please do not change unrelated lines. It makes your contribution harder to review and may introduce merge conflicts to other pull requests. .. highlight:: default @@ -244,21 +264,21 @@ valgrind_. Valgrind is a command-line tool that can trace memory errors in a variety of code. Follow these steps: - 1. Install `valgrind`_ on your system. +1. Install `valgrind`_ on your system. - 2. Download the python valgrind suppression file: `valgrind-python.supp`_. +2. Download the python valgrind suppression file: `valgrind-python.supp`_. - 3. Follow the directions in the `README.valgrind`_ file to customize your - python suppressions. If you don't, you will have spurious output coming - related to the python interpreter instead of your own code. +3. Follow the directions in the `README.valgrind`_ file to customize your + python suppressions. If you don't, you will have spurious output coming + related to the python interpreter instead of your own code. - 4. Run valgrind as follows: +4. Run valgrind as follows: -.. prompt:: bash $ + .. prompt:: bash $ - valgrind -v --suppressions=valgrind-python.supp python my_test_script.py + valgrind -v --suppressions=valgrind-python.supp python my_test_script.py -.. _valgrind: http://valgrind.org +.. _valgrind: https://valgrind.org .. _`README.valgrind`: https://github.com/python/cpython/blob/master/Misc/README.valgrind .. _`valgrind-python.supp`: https://github.com/python/cpython/blob/master/Misc/valgrind-python.supp @@ -270,7 +290,7 @@ corresponding location in your .pyx source file. Hopefully the output will give you clues as to the source of your memory error. For more information on valgrind and the array of options it has, see the -tutorials and documentation on the `valgrind web site `_. +tutorials and documentation on the `valgrind web site `_. .. _arm64_dev_env: @@ -335,3 +355,19 @@ point. Then use pytest to run only the tests of the module you are interested in debugging. + +.. _meson_build_backend: + +The Meson Build Backend +======================= + +Since scikit-learn 1.5.0 we use meson-python as the build tool. Meson is +a new tool for scikit-learn and the PyData ecosystem. It is used by several +other packages that have written good guides about what it is and how it works. + +- `pandas setup doc + `_: + pandas has a similar setup as ours (no spin or dev.py) +- `scipy Meson doc + `_ gives + more background about how Meson works behind the scenes diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst index 8b3612afda82a..2525b2b1365ed 100644 --- a/doc/developers/utilities.rst +++ b/doc/developers/utilities.rst @@ -97,7 +97,7 @@ Efficient Linear Algebra & Array Operations fast on large matrices on which you wish to extract only a small number of components. -- :func:`arrayfuncs.cholesky_delete`: +- `arrayfuncs.cholesky_delete`: (used in :func:`~sklearn.linear_model.lars_path`) Remove an item from a cholesky factorization. diff --git a/doc/documentation_team.rst b/doc/documentation_team.rst new file mode 100644 index 0000000000000..e7f13e5fe218f --- /dev/null +++ b/doc/documentation_team.rst @@ -0,0 +1,20 @@ +.. raw :: html + + +
+ +
+
+

Arturo Amor

+
+
+
+

Lucy Liu

+
+
+
+

Yao Xiao

+
+
diff --git a/doc/faq.rst b/doc/faq.rst index 8ffe1a717a4cc..8ddf0c4c238f6 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -1,8 +1,8 @@ .. _faq: -=========================== +========================== Frequently Asked Questions -=========================== +========================== .. currentmodule:: sklearn @@ -40,21 +40,31 @@ Note however that this support is still considered experimental and specific components might behave slightly differently. Please refer to the test suite of the specific module of interest for more details. +How can I obtain permission to use the images in scikit-learn for my work? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The images contained in the `scikit-learn repository +`_ and the images generated within +the `scikit-learn documentation `_ +can be used via the `BSD 3-Clause License +`_ for +your work. Citations of scikit-learn are highly encouraged and appreciated. See +:ref:`citing scikit-learn `. Implementation decisions ------------------------ -Why is there no support for deep or reinforcement learning / Will there be support for deep or reinforcement learning in scikit-learn? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Why is there no support for deep or reinforcement learning? Will there be such support in the future? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Deep learning and reinforcement learning both require a rich vocabulary to define an architecture, with deep learning additionally requiring GPUs for efficient computing. However, neither of these fit within -the design constraints of scikit-learn; as a result, deep learning +the design constraints of scikit-learn. As a result, deep learning and reinforcement learning are currently out of scope for what scikit-learn seeks to achieve. -You can find more information about addition of gpu support at +You can find more information about the addition of GPU support at `Will you add GPU support?`_. Note that scikit-learn currently implements a simple multilayer perceptron @@ -62,7 +72,7 @@ in :mod:`sklearn.neural_network`. We will only accept bug fixes for this module. If you want to implement more complex deep learning models, please turn to popular deep learning frameworks such as `tensorflow `_, -`keras `_ +`keras `_, and `pytorch `_. .. _adding_graphical_models: @@ -85,12 +95,12 @@ do structured prediction: * `pystruct `_ handles general structured learning (focuses on SSVMs on arbitrary graph structures with approximate inference; defines the notion of sample as an instance of - the graph structure) + the graph structure). * `seqlearn `_ handles sequences only (focuses on exact inference; has HMMs, but mostly for the sake of completeness; treats a feature vector as a sample and uses an offset encoding - for the dependencies between feature vectors) + for the dependencies between feature vectors). Why did you remove HMMs from scikit-learn? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -100,26 +110,52 @@ See :ref:`adding_graphical_models`. Will you add GPU support? ^^^^^^^^^^^^^^^^^^^^^^^^^ -No, or at least not in the near future. The main reason is that GPU support -will introduce many software dependencies and introduce platform specific -issues. scikit-learn is designed to be easy to install on a wide variety of -platforms. Outside of neural networks, GPUs don't play a large role in machine -learning today, and much larger gains in speed can often be achieved by a -careful choice of algorithms. +Adding GPU support by default would introduce heavy harware-specific software +dependencies and existing algorithms would need to be reimplemented. This would +make it both harder for the average user to install scikit-learn and harder for +the developers to maintain the code. + +However, since 2023, a limited but growing :ref:`list of scikit-learn +estimators ` can already run on GPUs if the input data is +provided as a PyTorch or CuPy array and if scikit-learn has been configured to +accept such inputs as explained in :ref:`array_api`. This Array API support +allows scikit-learn to run on GPUs without introducing heavy and +hardware-specific software dependencies to the main package. + +Most estimators that rely on NumPy for their computationally intensive operations +can be considered for Array API support and therefore GPU support. + +However, not all scikit-learn estimators are amenable to efficiently running +on GPUs via the Array API for fundamental algorithmic reasons. For instance, +tree-based models currently implemented with Cython in scikit-learn are +fundamentally not array-based algorithms. Other algorithms such as k-means or +k-nearest neighbors rely on array-based algorithms but are also implemented in +Cython. Cython is used to manually interleave consecutive array operations to +avoid introducing performance killing memory access to large intermediate +arrays: this low-level algorithmic rewrite is called "kernel fusion" and cannot +be expressed via the Array API for the foreseeable future. + +Adding efficient GPU support to estimators that cannot be efficiently +implemented with the Array API would require designing and adopting a more +flexible extension system for scikit-learn. This possibility is being +considered in the following GitHub issue (under discussion): + +- https://github.com/scikit-learn/scikit-learn/issues/22438 + Why do categorical variables need preprocessing in scikit-learn, compared to other tools? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Most of scikit-learn assumes data is in NumPy arrays or SciPy sparse matrices of a single numeric dtype. These do not explicitly represent categorical -variables at present. Thus, unlike R's data.frames or pandas.DataFrame, we -require explicit conversion of categorical features to numeric values, as +variables at present. Thus, unlike R's ``data.frames`` or :class:`pandas.DataFrame`, +we require explicit conversion of categorical features to numeric values, as discussed in :ref:`preprocessing_categorical_features`. See also :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` for an example of working with heterogeneous (e.g. categorical and numeric) data. -Why does Scikit-learn not directly work with, for example, pandas.DataFrame? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Why does scikit-learn not directly work with, for example, :class:`pandas.DataFrame`? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The homogeneous NumPy and SciPy data objects currently expected are most efficient to process for most operations. Extensive work would also be needed @@ -130,7 +166,6 @@ data structures. Note however that :class:`~sklearn.compose.ColumnTransformer` makes it convenient to handle heterogeneous pandas dataframes by mapping homogeneous subsets of dataframe columns selected by name or dtype to dedicated scikit-learn transformers. - Therefore :class:`~sklearn.compose.ColumnTransformer` are often used in the first step of scikit-learn pipelines when dealing with heterogeneous dataframes (see :ref:`pipeline` for more details). @@ -138,25 +173,22 @@ with heterogeneous dataframes (see :ref:`pipeline` for more details). See also :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` for an example of working with heterogeneous (e.g. categorical and numeric) data. -Do you plan to implement transform for target y in a pipeline? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Currently transform only works for features X in a pipeline. -There's a long-standing discussion about -not being able to transform y in a pipeline. -Follow on github issue -`#4143 `_. -Meanwhile check out +Do you plan to implement transform for target ``y`` in a pipeline? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Currently transform only works for features ``X`` in a pipeline. There's a +long-standing discussion about not being able to transform ``y`` in a pipeline. +Follow on GitHub issue :issue:`4143`. Meanwhile, you can check out :class:`~compose.TransformedTargetRegressor`, `pipegraph `_, -`imbalanced-learn `_. -Note that Scikit-learn solved for the case where y +and `imbalanced-learn `_. +Note that scikit-learn solved for the case where ``y`` has an invertible transformation applied before training -and inverted after prediction. Scikit-learn intends to solve for -use cases where y should be transformed at training time -and not at test time, for resampling and similar uses, -like at `imbalanced-learn`. +and inverted after prediction. scikit-learn intends to solve for +use cases where ``y`` should be transformed at training time +and not at test time, for resampling and similar uses, like at +`imbalanced-learn `_. In general, these use cases can be solved -with a custom meta estimator rather than a Pipeline +with a custom meta estimator rather than a :class:`~pipeline.Pipeline`. Why are there so many different estimators for linear models? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -174,16 +206,17 @@ each other. Let us have a look at - :class:`~linear_model.Ridge`, L2 penalty - :class:`~linear_model.Lasso`, L1 penalty (sparse models) - :class:`~linear_model.ElasticNet`, L1 + L2 penalty (less sparse models) -- :class:`~linear_model.SGDRegressor` with `loss='squared_loss'` +- :class:`~linear_model.SGDRegressor` with `loss="squared_loss"` **Maintainer perspective:** They all do in principle the same and are different only by the penalty they impose. This, however, has a large impact on the way the underlying optimization problem is solved. In the end, this amounts to usage of different -methods and tricks from linear algebra. A special case is `SGDRegressor` which +methods and tricks from linear algebra. A special case is +:class:`~linear_model.SGDRegressor` which comprises all 4 previous models and is different by the optimization procedure. A further side effect is that the different estimators favor different data -layouts (`X` c-contiguous or f-contiguous, sparse csr or csc). This complexity +layouts (`X` C-contiguous or F-contiguous, sparse csr or csc). This complexity of the seemingly simple linear models is the reason for having different estimator classes for different penalties. @@ -230,8 +263,8 @@ this reason. .. _new_algorithms_inclusion_criteria: -What are the inclusion criteria for new algorithms ? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +What are the inclusion criteria for new algorithms? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We only consider well-established algorithms for inclusion. A rule of thumb is at least 3 years since publication, 200+ citations, and wide use and @@ -256,8 +289,8 @@ Inclusion of a new algorithm speeding up an existing model is easier if: - it does not introduce new hyper-parameters (as it makes the library more future-proof), - it is easy to document clearly when the contribution improves the speed - and when it does not, for instance "when n_features >> - n_samples", + and when it does not, for instance, "when ``n_features >> + n_samples``", - benchmarks clearly show a speed up. Also, note that your implementation need not be in scikit-learn to be used @@ -282,7 +315,7 @@ at which point the original author might long have lost interest. See also :ref:`new_algorithms_inclusion_criteria`. For a great read about long-term maintenance issues in open-source software, look at `the Executive Summary of Roads and Bridges -`_ +`_. Using scikit-learn @@ -299,16 +332,14 @@ with the ``[scikit-learn]`` and ``[python]`` tags. You can alternatively use the Please make sure to include a minimal reproduction code snippet (ideally shorter than 10 lines) that highlights your problem on a toy dataset (for instance from -``sklearn.datasets`` or randomly generated with functions of ``numpy.random`` with +:mod:`sklearn.datasets` or randomly generated with functions of ``numpy.random`` with a fixed random seed). Please remove any line of code that is not necessary to reproduce your problem. The problem should be reproducible by simply copy-pasting your code snippet in a Python shell with scikit-learn installed. Do not forget to include the import statements. - More guidance to write good reproduction code snippets can be found at: - -https://stackoverflow.com/help/mcve +https://stackoverflow.com/help/mcve. If your problem raises an exception that you do not understand (even after googling it), please make sure to include the full traceback that you obtain when running the @@ -317,12 +348,9 @@ reproduction script. For bug reports or feature requests, please make use of the `issue tracker on GitHub `_. -There is also a `scikit-learn Gitter channel -`_ where some users and developers -might be found. - -**Please do not email any authors directly to ask for assistance, report bugs, -or for any other issue related to scikit-learn.** +.. warning:: + Please do not email any authors directly to ask for assistance, report bugs, + or for any other issue related to scikit-learn. How should I save, export or deploy estimators for production? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -336,15 +364,15 @@ Bunch objects are sometimes used as an output for functions and methods. They extend dictionaries by enabling values to be accessed by key, `bunch["value_key"]`, or by an attribute, `bunch.value_key`. -They should not be used as an input; therefore you almost never need to create -a ``Bunch`` object, unless you are extending the scikit-learn's API. +They should not be used as an input. Therefore you almost never need to create +a :class:`~utils.Bunch` object, unless you are extending scikit-learn's API. How can I load my own datasets into a format usable by scikit-learn? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Generally, scikit-learn works on any numeric data stored as numpy arrays or scipy sparse matrices. Other types that are convertible to numeric -arrays such as pandas DataFrame are also acceptable. +arrays such as :class:`pandas.DataFrame` are also acceptable. For more information on loading your data files into these usable data structures, please refer to :ref:`loading external datasets `. @@ -363,7 +391,7 @@ For more general feature extraction from any kind of data, see Another common case is when you have non-numerical data and a custom distance (or similarity) metric on these data. Examples include strings with edit -distance (aka. Levenshtein distance; e.g., DNA or RNA sequences). These can be +distance (aka. Levenshtein distance), for instance, DNA or RNA sequences. These can be encoded as numbers, but doing so is painful and error-prone. Working with distance metrics on arbitrary data can be done in two ways. @@ -371,15 +399,15 @@ Firstly, many estimators take precomputed distance/similarity matrices, so if the dataset is not too large, you can compute distances for all pairs of inputs. If the dataset is large, you can use feature vectors with only one "feature", which is an index into a separate data structure, and supply a custom metric -function that looks up the actual data in this data structure. E.g., to use -DBSCAN with Levenshtein distances:: +function that looks up the actual data in this data structure. For instance, to use +:class:`~cluster.dbscan` with Levenshtein distances:: - >>> from leven import levenshtein # doctest: +SKIP >>> import numpy as np + >>> from leven import levenshtein # doctest: +SKIP >>> from sklearn.cluster import dbscan >>> data = ["ACCTCCTAGAAG", "ACCTACTAGAAGTT", "GAATATTAGGCCGA"] >>> def lev_metric(x, y): - ... i, j = int(x[0]), int(y[0]) # extract indices + ... i, j = int(x[0]), int(y[0]) # extract indices ... return levenshtein(data[i], data[j]) ... >>> X = np.arange(len(data)).reshape(-1, 1) @@ -387,27 +415,26 @@ DBSCAN with Levenshtein distances:: array([[0], [1], [2]]) - >>> # We need to specify algoritum='brute' as the default assumes + >>> # We need to specify algorithm='brute' as the default assumes >>> # a continuous feature space. - >>> dbscan(X, metric=lev_metric, eps=5, min_samples=2, algorithm='brute') - ... # doctest: +SKIP - ([0, 1], array([ 0, 0, -1])) - -(This uses the third-party edit distance package ``leven``.) + >>> dbscan(X, metric=lev_metric, eps=5, min_samples=2, algorithm='brute') # doctest: +SKIP + (array([0, 1]), array([ 0, 0, -1])) -Similar tricks can be used, with some care, for tree kernels, graph kernels, -etc. +Note that the example above uses the third-party edit distance package +`leven `_. Similar tricks can be used, +with some care, for tree kernels, graph kernels, etc. -Why do I sometime get a crash/freeze with n_jobs > 1 under OSX or Linux? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Why do I sometimes get a crash/freeze with ``n_jobs > 1`` under OSX or Linux? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Several scikit-learn tools such as ``GridSearchCV`` and ``cross_val_score`` -rely internally on Python's `multiprocessing` module to parallelize execution +Several scikit-learn tools such as :class:`~model_selection.GridSearchCV` and +:class:`~model_selection.cross_val_score` rely internally on Python's +:mod:`multiprocessing` module to parallelize execution onto several Python processes by passing ``n_jobs > 1`` as an argument. -The problem is that Python ``multiprocessing`` does a ``fork`` system call +The problem is that Python :mod:`multiprocessing` does a ``fork`` system call without following it with an ``exec`` system call for performance reasons. Many -libraries like (some versions of) Accelerate / vecLib under OSX, (some versions +libraries like (some versions of) Accelerate or vecLib under OSX, (some versions of) MKL, the OpenMP runtime of GCC, nvidia's Cuda (and probably many others), manage their own internal thread pool. Upon a call to `fork`, the thread pool state in the child process is corrupted: the thread pool believes it has many @@ -418,30 +445,30 @@ main since 0.2.10) and we contributed a `patch `_ to GCC's OpenMP runtime (not yet reviewed). -But in the end the real culprit is Python's ``multiprocessing`` that does +But in the end the real culprit is Python's :mod:`multiprocessing` that does ``fork`` without ``exec`` to reduce the overhead of starting and using new Python processes for parallel computing. Unfortunately this is a violation of the POSIX standard and therefore some software editors like Apple refuse to -consider the lack of fork-safety in Accelerate / vecLib as a bug. +consider the lack of fork-safety in Accelerate and vecLib as a bug. -In Python 3.4+ it is now possible to configure ``multiprocessing`` to -use the 'forkserver' or 'spawn' start methods (instead of the default -'fork') to manage the process pools. To work around this issue when +In Python 3.4+ it is now possible to configure :mod:`multiprocessing` to +use the ``"forkserver"`` or ``"spawn"`` start methods (instead of the default +``"fork"``) to manage the process pools. To work around this issue when using scikit-learn, you can set the ``JOBLIB_START_METHOD`` environment -variable to 'forkserver'. However the user should be aware that using -the 'forkserver' method prevents joblib.Parallel to call function +variable to ``"forkserver"``. However the user should be aware that using +the ``"forkserver"`` method prevents :class:`joblib.Parallel` to call function interactively defined in a shell session. -If you have custom code that uses ``multiprocessing`` directly instead of using -it via joblib you can enable the 'forkserver' mode globally for your -program: Insert the following instructions in your main script:: +If you have custom code that uses :mod:`multiprocessing` directly instead of using +it via :mod:`joblib` you can enable the ``"forkserver"`` mode globally for your +program. Insert the following instructions in your main script:: import multiprocessing # other imports, custom code, load data, define model... - if __name__ == '__main__': - multiprocessing.set_start_method('forkserver') + if __name__ == "__main__": + multiprocessing.set_start_method("forkserver") # call scikit-learn utils with n_jobs > 1 here @@ -450,20 +477,20 @@ documentation `. +For more details, please refer to our :ref:`notes on parallelism `. How do I set a ``random_state`` for an entire execution? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 298200f5a2afd..cd4d953db1b8a 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -37,8 +37,8 @@ The :term:`fit` method generally accepts 2 inputs: represented as rows and features are represented as columns. - The target values :term:`y` which are real numbers for regression tasks, or integers for classification (or any other discrete set of values). For - unsupervized learning tasks, ``y`` does not need to be specified. ``y`` is - usually 1d array where the ``i`` th entry corresponds to the target of the + unsupervised learning tasks, ``y`` does not need to be specified. ``y`` is + usually a 1d array where the ``i`` th entry corresponds to the target of the ``i`` th sample (row) of ``X``. Both ``X`` and ``y`` are usually expected to be numpy arrays or equivalent diff --git a/doc/glossary.rst b/doc/glossary.rst index 07f844619cc54..84a628b0f716d 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -66,6 +66,7 @@ General Concepts It excludes: * a :term:`sparse matrix` + * a sparse array * an iterator * a generator @@ -205,6 +206,29 @@ General Concepts exceptional behaviours on the estimator using semantic :term:`estimator tags`. + cross-fitting + cross fitting + A resampling method that iteratively partitions data into mutually + exclusive subsets to fit two stages. During the first stage, the + mutually exclusive subsets enable predictions or transformations to be + computed on data not seen during training. The computed data is then + used in the second stage. The objective is to avoid having any + overfitting in the first stage introduce bias into the input data + distribution of the second stage. + For examples of its use, see: :class:`~preprocessing.TargetEncoder`, + :class:`~ensemble.StackingClassifier`, + :class:`~ensemble.StackingRegressor` and + :class:`~calibration.CalibratedClassifierCV`. + + cross-validation + cross validation + A resampling method that iteratively partitions data into mutually + exclusive 'train' and 'test' subsets so model performance can be + evaluated on unseen data. This conserves data as avoids the need to hold + out a 'validation' dataset and accounts for variability as multiple + rounds of cross validation are generally performed. + See :ref:`User Guide ` for more details. + deprecation We use deprecation to slowly violate our :term:`backwards compatibility` assurances, usually to: @@ -262,7 +286,26 @@ General Concepts Note that in this case, the precision can be platform dependent. The `numeric` dtype refers to accepting both `integer` and `floating`. - TODO: Mention efficiency and precision issues; casting policy. + When it comes to choosing between 64-bit dtype (i.e. `np.float64` and + `np.int64`) and 32-bit dtype (i.e. `np.float32` and `np.int32`), it + boils down to a trade-off between efficiency and precision. The 64-bit + types offer more accurate results due to their lower floating-point + error, but demand more computational resources, resulting in slower + operations and increased memory usage. In contrast, 32-bit types + promise enhanced operation speed and reduced memory consumption, but + introduce a larger floating-point error. The efficiency improvement are + dependent on lower level optimization such as like vectorization, + single instruction multiple dispatch (SIMD), or cache optimization but + crucially on the compatibility of the algorithm in use. + + Specifically, the choice of precision should account for whether the + employed algorithm can effectively leverage `np.float32`. Some + algorithms, especially certain minimization methods, are exclusively + coded for `np.float64`, meaning that even if `np.float32` is passed, it + triggers an automatic conversion back to `np.float64`. This not only + negates the intended computational savings but also introduces + additional overhead, making operations with `np.float32` unexpectedly + slower and more memory-intensive due to this extra conversion step. duck typing We try to apply `duck typing @@ -344,8 +387,8 @@ General Concepts evaluation metric evaluation metrics Evaluation metrics give a measure of how well a model performs. We may - use this term specifically to refer to the functions in :mod:`metrics` - (disregarding :mod:`metrics.pairwise`), as distinct from the + use this term specifically to refer to the functions in :mod:`~sklearn.metrics` + (disregarding :mod:`~sklearn.metrics.pairwise`), as distinct from the :term:`score` method and the :term:`scoring` API used in cross validation. See :ref:`model_evaluation`. @@ -360,7 +403,7 @@ General Concepts the scoring API. Note that some estimators can calculate metrics that are not included - in :mod:`metrics` and are estimator-specific, notably model + in :mod:`~sklearn.metrics` and are estimator-specific, notably model likelihoods. estimator tags @@ -494,8 +537,8 @@ General Concepts applying a :term:`transformer` to the entirety of a dataset rather than each training portion in a cross validation split. - We aim to provide interfaces (such as :mod:`pipeline` and - :mod:`model_selection`) that shield the user from data leakage. + We aim to provide interfaces (such as :mod:`~sklearn.pipeline` and + :mod:`~sklearn.model_selection`) that shield the user from data leakage. memmapping memory map @@ -575,7 +618,7 @@ General Concepts params We mostly use *parameter* to refer to the aspects of an estimator that can be specified in its construction. For example, ``max_depth`` and - ``random_state`` are parameters of :class:`RandomForestClassifier`. + ``random_state`` are parameters of :class:`~ensemble.RandomForestClassifier`. Parameters to an estimator's constructor are stored unmodified as attributes on the estimator instance, and conventionally start with an alphabetic character and end with an alphanumeric character. Each @@ -620,7 +663,7 @@ General Concepts implementations of distance metrics (as well as improper metrics like Cosine Distance) through :func:`metrics.pairwise_distances`, and of kernel functions (a constrained class of similarity functions) in - :func:`metrics.pairwise_kernels`. These can compute pairwise distance + :func:`metrics.pairwise.pairwise_kernels`. These can compute pairwise distance matrices that are symmetric and hence store data redundantly. See also :term:`precomputed` and :term:`metric`. @@ -748,6 +791,15 @@ General Concepts possible (i.e. if an estimator does not / cannot support sparse matrices). + stateless + An estimator is stateless if it does not store any information that is + obtained during :term:`fit`. This information can be either parameters + learned during :term:`fit` or statistics computed from the + training data. An estimator is stateless if it has no :term:`attributes` + apart from ones set in `__init__`. Calling :term:`fit` for these + estimators will only validate the public :term:`attributes` passed + in `__init__`. + supervised supervised learning Learning where the expected prediction (label or ground truth) is @@ -1017,6 +1069,38 @@ Further examples: * :class:`gaussian_process.kernels.Kernel` * ``tree.Criterion`` +.. _glossary_metadata_routing: + +Metadata Routing +================ + +.. glossary:: + + consumer + An object which consumes :term:`metadata`. This object is usually an + :term:`estimator`, a :term:`scorer`, or a :term:`CV splitter`. Consuming + metadata means using it in calculations, e.g. using + :term:`sample_weight` to calculate a certain type of score. Being a + consumer doesn't mean that the object always receives a certain + metadata, rather it means it can use it if it is provided. + + metadata + Data which is related to the given :term:`X` and :term:`y` data, but + is not directly a part of the data, e.g. :term:`sample_weight` or + :term:`groups`, and is passed along to different objects and methods, + e.g. to a :term:`scorer` or a :term:`CV splitter`. + + router + An object which routes metadata to :term:`consumers `. This + object is usually a :term:`meta-estimator`, e.g. + :class:`~pipeline.Pipeline` or :class:`~model_selection.GridSearchCV`. + Some routers can also be a consumer. This happens for example when a + meta-estimator uses the given :term:`groups`, and it also passes it + along to some of its sub-objects, such as a :term:`CV splitter`. + +Please refer to :ref:`Metadata Routing User Guide ` for more +information. + .. _glossary_target_types: Target Types @@ -1113,7 +1197,7 @@ Target Types XXX: For simplicity, we may not always support string class labels for multiclass multioutput, and integer class labels should be used. - :mod:`multioutput` provides estimators which estimate multi-output + :mod:`~sklearn.multioutput` provides estimators which estimate multi-output problems using multiple single-output estimators. This may not fully account for dependencies among the different outputs, which methods natively handling the multioutput case (e.g. decision trees, nearest @@ -1465,7 +1549,7 @@ functions or non-estimator constructors. 1: 1}, {0: 1, 1: 1}]`` instead of ``[{1:1}, {2:5}, {3:1}, {4:1}]``. The ``class_weight`` parameter is validated and interpreted with - :func:`utils.compute_class_weight`. + :func:`utils.class_weight.compute_class_weight`. ``cv`` Determines a cross validation splitting strategy, as used in @@ -1491,16 +1575,17 @@ functions or non-estimator constructors. With some exceptions (especially where not using cross validation at all is an option), the default is 5-fold. - ``cv`` values are validated and interpreted with :func:`utils.check_cv`. + ``cv`` values are validated and interpreted with + :func:`model_selection.check_cv`. ``kernel`` Specifies the kernel function to be used by Kernel Method algorithms. - For example, the estimators :class:`SVC` and - :class:`GaussianProcessClassifier` both have a ``kernel`` parameter - that takes the name of the kernel to use as string or a callable - kernel function used to compute the kernel matrix. For more reference, - see the :ref:`kernel_approximation` and the :ref:`gaussian_process` - user guides. + For example, the estimators :class:`svm.SVC` and + :class:`gaussian_process.GaussianProcessClassifier` both have a + ``kernel`` parameter that takes the name of the kernel to use as string + or a callable kernel function used to compute the kernel matrix. For + more reference, see the :ref:`kernel_approximation` and the + :ref:`gaussian_process` user guides. ``max_iter`` For estimators involving iterative optimization, this determines the @@ -1657,10 +1742,24 @@ functions or non-estimator constructors. in a subsequent call to :term:`fit`. Note that this is only applicable for some models and some - parameters, and even some orders of parameter values. For example, - ``warm_start`` may be used when building random forests to add more - trees to the forest (increasing ``n_estimators``) but not to reduce - their number. + parameters, and even some orders of parameter values. In general, there + is an interaction between ``warm_start`` and the parameter controlling + the number of iterations of the estimator. + + For estimators imported from :mod:`~sklearn.ensemble`, + ``warm_start`` will interact with ``n_estimators`` or ``max_iter``. + For these models, the number of iterations, reported via + ``len(estimators_)`` or ``n_iter_``, corresponds the total number of + estimators/iterations learnt since the initialization of the model. + Thus, if a model was already initialized with `N` estimators, and `fit` + is called with ``n_estimators`` or ``max_iter`` set to `M`, the model + will train `M - N` new estimators. + + Other models, usually using gradient-based solvers, have a different + behavior. They all expose a ``max_iter`` parameter. The reported + ``n_iter_`` corresponds to the number of iteration done during the last + call to ``fit`` and will be at most ``max_iter``. Thus, we do not + consider the state of the estimator since the initialization. :term:`partial_fit` also retains the model between calls, but differs: with ``warm_start`` the parameters change and the data is diff --git a/doc/governance.rst b/doc/governance.rst index a6db1f6bf769c..d6b07afe4eeb4 100644 --- a/doc/governance.rst +++ b/doc/governance.rst @@ -20,95 +20,103 @@ the project community. Roles And Responsibilities ========================== +We distinguish between contributors, core contributors, and the technical +committee. A key distinction between them is their voting rights: contributors +have no voting rights, whereas the other two groups all have voting rights, +as well as permissions to the tools relevant to their roles. + Contributors ------------ Contributors are community members who contribute in concrete ways to the project. Anyone can become a contributor, and contributions can take many forms – not only code – as detailed in the :ref:`contributors guide `. +There is no process to become a contributor: once somebody contributes to the +project in any way, they are a contributor. -Contributor Experience Team ---------------------------- - -The contributor experience team is composed of community members who have permission on -github to label and close issues. :ref:`Their work ` is -crucial to improve the communication in the project and limit the crowding -of the issue tracker. - -Similarly to what has been decided in the `python project -`_, -any contributor may become a member of the scikit-learn contributor experience team, -after showing some continuity in participating to scikit-learn -development (with pull requests and reviews). -Any core developer or member of the contributor experience team is welcome to propose a -scikit-learn contributor to join the contributor experience team. Other core developers -are then consulted: while it is expected that most acceptances will be -unanimous, a two-thirds majority is enough. -Every new member of the contributor experience team will be announced in the mailing -list. Members of the team are welcome to participate in `monthly core developer meetings -`_. - -.. _communication_team: - -Communication team -------------------- +Core Contributors +----------------- -Members of the communication team help with outreach and communication -for scikit-learn. The goal of the team is to develop public awareness of -scikit-learn, of its features and usage, as well as branding. +All core contributor members have the same voting rights and right to propose +new members to any of the roles listed below. Their membership is represented +as being an organization member on the scikit-learn `GitHub organization +`_. -For this, they can operate the scikit-learn accounts on various social -networks and produce materials. - -Every new communicator will be announced in the mailing list. -Communicators are welcome to participate in `monthly core developer meetings +They are also welcome to join our `monthly core contributor meetings `_. -Core developers ---------------- - -Core developers are community members who have shown that they are dedicated to -the continued development of the project through ongoing engagement with the -community. They have shown they can be trusted to maintain scikit-learn with -care. Being a core developer allows contributors to more easily carry on -with their project related activities by giving them direct access to the -project's repository and is represented as being an organization member on the -scikit-learn `GitHub organization `_. -Core developers are expected to review code -contributions, can merge approved pull requests, can cast votes for and against -merging a pull-request, and can be involved in deciding major changes to the -API. - -New core developers can be nominated by any existing core developers. Once they -have been nominated, there will be a vote by the current core developers. -Voting on new core developers is one of the few activities that takes place on -the project's private management list. While it is expected that most votes -will be unanimous, a two-thirds majority of the cast votes is enough. The vote -needs to be open for at least 1 week. - -Core developers that have not contributed to the project (commits or GitHub -comments) in the past 12 months will be asked if they want to become emeritus -core developers and recant their commit and voting rights until they become -active again. The list of core developers, active and emeritus (with dates at -which they became active) is public on the scikit-learn website. +New members can be nominated by any existing member. Once they have been +nominated, there will be a vote by the current core contributors. Voting on new +members is one of the few activities that takes place on the project's private +mailing list. While it is expected that most votes will be unanimous, a +two-thirds majority of the cast votes is enough. The vote needs to be open for +at least 1 week. + +Core contributors that have not contributed to the project, corresponding to +their role, in the past 12 months will be asked if they want to become emeritus +members and recant their rights until they become active again. The list of +members, active and emeritus (with dates at which they became active) is public +on the scikit-learn website. + +The following teams form the core contributors group: + +* **Contributor Experience Team** + The contributor experience team improves the experience of contributors by + helping with the triage of issues and pull requests, as well as noticing any + repeating patterns where people might struggle, and to help with improving + those aspects of the project. + + To this end, they have the required permissions on github to label and close + issues. :ref:`Their work ` is crucial to improve the + communication in the project and limit the crowding of the issue tracker. + + .. _communication_team: + +* **Communication Team** + Members of the communication team help with outreach and communication + for scikit-learn. The goal of the team is to develop public awareness of + scikit-learn, of its features and usage, as well as branding. + + For this, they can operate the scikit-learn accounts on various social networks + and produce materials. They also have the required rights to our blog + repository and other relevant accounts and platforms. + +* **Documentation Team** + Members of the documentation team engage with the documentation of the project + among other things. They might also be involved in other aspects of the + project, but their reviews on documentation contributions are considered + authoritative, and can merge such contributions. + + To this end, they have permissions to merge pull requests in scikit-learn's + repository. + +* **Maintainers Team** + Maintainers are community members who have shown that they are dedicated to the + continued development of the project through ongoing engagement with the + community. They have shown they can be trusted to maintain scikit-learn with + care. Being a maintainer allows contributors to more easily carry on with their + project related activities by giving them direct access to the project's + repository. Maintainers are expected to review code contributions, merge + approved pull requests, cast votes for and against merging a pull-request, + and to be involved in deciding major changes to the API. Technical Committee ------------------- -The Technical Committee (TC) members are core developers who have additional -responsibilities to ensure the smooth running of the project. TC members are expected to -participate in strategic planning, and approve changes to the governance model. -The purpose of the TC is to ensure a smooth progress from the big-picture -perspective. Indeed changes that impact the full project require a synthetic -analysis and a consensus that is both explicit and informed. In cases that the -core developer community (which includes the TC members) fails to reach such a -consensus in the required time frame, the TC is the entity to resolve the -issue. -Membership of the TC is by nomination by a core developer. A nomination will -result in discussion which cannot take more than a month and then a vote by -the core developers which will stay open for a week. TC membership votes are -subject to a two-third majority of all cast votes as well as a simple majority -approval of all the current TC members. TC members who do not actively engage -with the TC duties are expected to resign. + +The Technical Committee (TC) members are maintainers who have additional +responsibilities to ensure the smooth running of the project. TC members are +expected to participate in strategic planning, and approve changes to the +governance model. The purpose of the TC is to ensure a smooth progress from the +big-picture perspective. Indeed changes that impact the full project require a +synthetic analysis and a consensus that is both explicit and informed. In cases +that the core contributor community (which includes the TC members) fails to +reach such a consensus in the required time frame, the TC is the entity to +resolve the issue. Membership of the TC is by nomination by a core contributor. +A nomination will result in discussion which cannot take more than a month and +then a vote by the core contributors which will stay open for a week. TC +membership votes are subject to a two-third majority of all cast votes as well +as a simple majority approval of all the current TC members. TC members who do +not actively engage with the TC duties are expected to resign. The Technical Committee of scikit-learn consists of :user:`Thomas Fan `, :user:`Alexandre Gramfort `, :user:`Olivier Grisel @@ -125,39 +133,55 @@ and the `issue tracker `_. Occasionally, sensitive discussion occurs on a private list. Scikit-learn uses a "consensus seeking" process for making decisions. The group -tries to find a resolution that has no open objections among core developers. -At any point during the discussion, any core-developer can call for a vote, which will -conclude one month from the call for the vote. Any vote must be backed by a -:ref:`SLEP `. If no option can gather two thirds of the votes cast, the -decision is escalated to the TC, which in turn will use consensus seeking with -the fallback option of a simple majority vote if no consensus can be found -within a month. This is what we hereafter may refer to as “the decision making -process”. - -Decisions (in addition to adding core developers and TC membership as above) +tries to find a resolution that has no open objections among core contributors. +At any point during the discussion, any core contributor can call for a vote, +which will conclude one month from the call for the vote. Most votes have to be +backed by a :ref:`SLEP `. If no option can gather two thirds of the votes +cast, the decision is escalated to the TC, which in turn will use consensus +seeking with the fallback option of a simple majority vote if no consensus can +be found within a month. This is what we hereafter may refer to as "**the +decision making process**". + +Decisions (in addition to adding core contributors and TC membership as above) are made according to the following rules: -* **Minor Documentation changes**, such as typo fixes, or addition / correction of a - sentence, but no change of the scikit-learn.org landing page or the “about” - page: Requires +1 by a core developer, no -1 by a core developer (lazy - consensus), happens on the issue or pull request page. Core developers are - expected to give “reasonable time” to others to give their opinion on the pull - request if they're not confident others would agree. +* **Minor Documentation changes**, such as typo fixes, or addition / correction + of a sentence, but no change of the ``scikit-learn.org`` landing page or the + “about” page: Requires +1 by a maintainer, no -1 by a maintainer (lazy + consensus), happens on the issue or pull request page. Maintainers are + expected to give “reasonable time” to others to give their opinion on the + pull request if they're not confident others would agree. * **Code changes and major documentation changes** - require +1 by two core developers, no -1 by a core developer (lazy + require +1 by two maintainers, no -1 by a maintainer (lazy consensus), happens on the issue of pull-request page. * **Changes to the API principles and changes to dependencies or supported - versions** happen via a :ref:`slep` and follows the decision-making process outlined above. - -* **Changes to the governance model** use the same decision process outlined above. + versions** happen via a :ref:`slep` and follows the decision-making process + outlined above. +* **Changes to the governance model** follow the process outlined in `SLEP020 + `__. If a veto -1 vote is cast on a lazy consensus, the proposer can appeal to the -community and core developers and the change can be approved or rejected using +community and maintainers and the change can be approved or rejected using the decision making procedure outlined above. +Governance Model Changes +------------------------ + +Governance model changes occur through an enhancement proposal or a GitHub Pull +Request. An enhancement proposal will go through "**the decision-making process**" +described in the previous section. Alternatively, an author may propose a change +directly to the governance model with a GitHub Pull Request. Logistically, an +author can open a Draft Pull Request for feedback and follow up with a new +revised Pull Request for voting. Once that author is happy with the state of the +Pull Request, they can call for a vote on the public mailing list. During the +one-month voting period, the Pull Request can not change. A Pull Request +Approval will count as a positive vote, and a "Request Changes" review will +count as a negative vote. If two-thirds of the cast votes are positive, then +the governance model change is accepted. + .. _slep: Enhancement proposals (SLEPs) @@ -165,6 +189,10 @@ Enhancement proposals (SLEPs) For all votes, a proposal must have been made public and discussed before the vote. Such proposal must be a consolidated document, in the form of a "Scikit-Learn Enhancement Proposal" (SLEP), rather than a long discussion on an -issue. A SLEP must be submitted as a pull-request to -`enhancement proposals `_ -using the `SLEP template `_. +issue. A SLEP must be submitted as a pull-request to `enhancement proposals +`_ using the `SLEP +template +`_. +`SLEP000 +`__ +describes the process in more detail. diff --git a/doc/images/Tidelift-logo-on-light.svg b/doc/images/Tidelift-logo-on-light.svg new file mode 100644 index 0000000000000..af12d68417235 --- /dev/null +++ b/doc/images/Tidelift-logo-on-light.svg @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + diff --git a/doc/images/bcg-small.png b/doc/images/bcg-small.png deleted file mode 100644 index 8ff377969003a..0000000000000 Binary files a/doc/images/bcg-small.png and /dev/null differ diff --git a/doc/images/beta_divergence.png b/doc/images/beta_divergence.png new file mode 100644 index 0000000000000..38e06524d1707 Binary files /dev/null and b/doc/images/beta_divergence.png differ diff --git a/doc/images/chanel-small.png b/doc/images/chanel-small.png new file mode 100644 index 0000000000000..b1965b714a42f Binary files /dev/null and b/doc/images/chanel-small.png differ diff --git a/doc/images/chanel.png b/doc/images/chanel.png new file mode 100644 index 0000000000000..1b2d39fd4facf Binary files /dev/null and b/doc/images/chanel.png differ diff --git a/doc/images/fujitsu-small.png b/doc/images/fujitsu-small.png deleted file mode 100644 index b77447117497d..0000000000000 Binary files a/doc/images/fujitsu-small.png and /dev/null differ diff --git a/doc/images/permuted_non_predictive_feature.png b/doc/images/permuted_non_predictive_feature.png new file mode 100644 index 0000000000000..3ba908cbfbe83 Binary files /dev/null and b/doc/images/permuted_non_predictive_feature.png differ diff --git a/doc/images/permuted_predictive_feature.png b/doc/images/permuted_predictive_feature.png new file mode 100644 index 0000000000000..702c698425618 Binary files /dev/null and b/doc/images/permuted_predictive_feature.png differ diff --git a/doc/images/probabl.png b/doc/images/probabl.png new file mode 100644 index 0000000000000..aab532ba62d95 Binary files /dev/null and b/doc/images/probabl.png differ diff --git a/doc/images/target_encoder_cross_validation.svg b/doc/images/target_encoder_cross_validation.svg new file mode 100644 index 0000000000000..769d5a8affb2e --- /dev/null +++ b/doc/images/target_encoder_cross_validation.svg @@ -0,0 +1,3 @@ + + +
Fold1
Fold1
Train
Train
Train
Train
Train
Train
Train
Train
Train
Train
Fold2
Fold2
Train
Train
Train
Train
Train
Train
Train
Train
Train
Train
Fold3
Fold3
Train
Train
Train
Train
Train
Train
Train
Train
Train
Train
Fold4
Fold4
Train
Train
Train
Train
Train
Train
Train
Train
Train
Train
Fold5
Fold5
Fold1
Fold1
Fold2
Fold2
Fold3
Fold3
Fold4
Fold4
Fold5
Fold5
Combine
Folds
Combine...
Text is not SVG - cannot display
diff --git a/doc/install.rst b/doc/install.rst index faae9fccb60f3..89851171f4588 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -61,7 +61,7 @@ Installing the latest release >Install python3 and python3-pip using the package manager of the Linux Distribution.Install conda using the Anaconda or miniconda - installers or the miniforge installers + installers or the miniforge installers (no administrator permission required for any of those). @@ -69,42 +69,65 @@ Then run: .. raw:: html -
python3 -m venv sklearn-venvpython -m venv sklearn-venvpython -m venv sklearn-venvsource sklearn-venv/bin/activatesource sklearn-venv/bin/activatesklearn-venv\Scripts\activatepip install -U scikit-learnpip install -U scikit-learnpip install -U scikit-learnpip3 install -U scikit-learnconda create -n sklearn-env -c conda-forge scikit-learnconda activate sklearn-env
+
+
pip3 install -U scikit-learn
+ +
pip install -U scikit-learn
+ +
pip install -U scikit-learn
+ +
python3 -m venv sklearn-venv
+  source sklearn-venv/bin/activate
+  pip3 install -U scikit-learn
+ +
python -m venv sklearn-venv
+  sklearn-venv\Scripts\activate
+  pip install -U scikit-learn
+ +
python -m venv sklearn-venv
+  source sklearn-venv/bin/activate
+  pip install -U scikit-learn
+ +
conda create -n sklearn-env -c conda-forge scikit-learn
+  conda activate sklearn-env
+
In order to check your installation you can use .. raw:: html -
python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython3 -m pip freeze  # to see all packages installed in the active virtualenvpython3 -c "import sklearn; sklearn.show_versions()"python -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython -m pip freeze  # to see all packages installed in the active virtualenvpython -c "import sklearn; sklearn.show_versions()"python -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython -m pip freeze  # to see all packages installed in the active virtualenvpython -c "import sklearn; sklearn.show_versions()"python -m pip show scikit-learn  # to see which version and where scikit-learn is installedpython -m pip freeze  # to see all packages installed in the active virtualenvpython -c "import sklearn; sklearn.show_versions()"conda list scikit-learn  # to see which scikit-learn version is installedconda list  # to see all packages installed in the active conda environmentpython -c "import sklearn; sklearn.show_versions()"
+
+
python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installed
+  python3 -m pip freeze  # to see all packages installed in the active virtualenv
+  python3 -c "import sklearn; sklearn.show_versions()"
+ +
python -m pip show scikit-learn  # to see which version and where scikit-learn is installed
+  python -m pip freeze  # to see all packages installed in the active virtualenv
+  python -c "import sklearn; sklearn.show_versions()"
+ +
python -m pip show scikit-learn  # to see which version and where scikit-learn is installed
+  python -m pip freeze  # to see all packages installed in the active virtualenv
+  python -c "import sklearn; sklearn.show_versions()"
+ +
python -m pip show scikit-learn  # to see which version and where scikit-learn is installed
+  python -m pip freeze  # to see all packages installed in the active virtualenv
+  python -c "import sklearn; sklearn.show_versions()"
+ +
conda list scikit-learn  # to see which scikit-learn version is installed
+  conda list  # to see all packages installed in the active conda environment
+  python -c "import sklearn; sklearn.show_versions()"
Note that in order to avoid potential conflicts with other packages it is @@ -143,34 +166,8 @@ purpose. Scikit-learn 0.22 supported Python 3.5-3.8. Scikit-learn 0.23 - 0.24 require Python 3.6 or newer. Scikit-learn 1.0 supported Python 3.7-3.10. - Scikit-learn 1.1 and later requires Python 3.8 or newer. - - -.. note:: - - For installing on PyPy, PyPy3-v5.10+, Numpy 1.14.0+, and scipy 1.1.0+ - are required. - -.. _install_on_apple_silicon_m1: - -Installing on Apple Silicon M1 hardware -======================================= - -The recently introduced `macos/arm64` platform (sometimes also known as -`macos/aarch64`) requires the open source community to upgrade the build -configuration and automation to properly support it. - -At the time of writing (January 2021), the only way to get a working -installation of scikit-learn on this hardware is to install scikit-learn and its -dependencies from the conda-forge distribution, for instance using the miniforge -installers: - -https://github.com/conda-forge/miniforge - -The following issue tracks progress on making it possible to install -scikit-learn from PyPI with pip: - -https://github.com/scikit-learn/scikit-learn/issues/19137 + Scikit-learn 1.1, 1.2 and 1.3 support Python 3.8-3.12 + Scikit-learn 1.4 requires Python 3.9 or newer. .. _install_by_distribution: @@ -220,8 +217,8 @@ Debian/Ubuntu The Debian/Ubuntu package is split in three different packages called ``python3-sklearn`` (python modules), ``python3-sklearn-lib`` (low-level implementations and bindings), ``python3-sklearn-doc`` (documentation). -Only the Python 3 version is available in the Debian Buster (the more recent -Debian distribution). +Note that scikit-learn requires Python 3, hence the need to use the `python3-` +suffixed package names. Packages can be installed using ``apt-get``: .. prompt:: bash $ @@ -233,7 +230,7 @@ Fedora ------ The Fedora package is called ``python3-scikit-learn`` for the python 3 version, -the only one available in Fedora30. +the only one available in Fedora. It can be installed using ``dnf``: .. prompt:: bash $ @@ -247,7 +244,7 @@ NetBSD scikit-learn is available via `pkgsrc-wip `_: - http://pkgsrc.se/math/py-scikit-learn + https://pkgsrc.se/math/py-scikit-learn MacPorts for Mac OSX @@ -274,26 +271,35 @@ python library for Windows, Mac OSX and Linux. Anaconda offers scikit-learn as part of its free distribution. -Intel conda channel -------------------- +Intel Extension for Scikit-learn +-------------------------------- -Intel maintains a dedicated conda channel that ships scikit-learn: +Intel maintains an optimized x86_64 package, available in PyPI (via `pip`), +and in the `main`, `conda-forge` and `intel` conda channels: .. prompt:: bash $ - conda install -c intel scikit-learn + conda install scikit-learn-intelex -This version of scikit-learn comes with alternative solvers for some common -estimators. Those solvers come from the DAAL C++ library and are optimized for -multi-core Intel CPUs. +This package has an Intel optimized version of many estimators. Whenever +an alternative implementation doesn't exist, scikit-learn implementation +is used as a fallback. Those optimized solvers come from the oneDAL +C++ library and are optimized for the x86_64 architecture, and are +optimized for multi-core Intel CPUs. Note that those solvers are not enabled by default, please refer to the -`daal4py `_ documentation -for more details. +`scikit-learn-intelex `_ +documentation for more details on usage scenarios. Direct export example: + +.. prompt:: python >>> + + from sklearnex.neighbors import NearestNeighbors Compatibility with the standard scikit-learn solvers is checked by running the full scikit-learn test suite via automated continuous integration as reported -on https://github.com/IntelPython/daal4py. +on https://github.com/intel/scikit-learn-intelex. If you observe any issue +with `scikit-learn-intelex`, please report the issue on their +`issue tracker `__. WinPython for Windows @@ -319,7 +325,7 @@ size limit of Windows if Python is installed in a nested location such as the Collecting scikit-learn ... Installing collected packages: scikit-learn - ERROR: Could not install packages due to an EnvironmentError: [Errno 2] No such file or directory: 'C:\\Users\\username\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python37\\site-packages\\sklearn\\datasets\\tests\\data\\openml\\292\\api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz' + ERROR: Could not install packages due to an OSError: [Errno 2] No such file or directory: 'C:\\Users\\username\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python37\\site-packages\\sklearn\\datasets\\tests\\data\\openml\\292\\api-v1-json-data-list-data_name-australian-limit-2-data_version-1-status-deactivated.json.gz' In this case it is possible to lift that limit in the Windows registry by using the ``regedit`` tool: @@ -335,6 +341,6 @@ using the ``regedit`` tool: #. Reinstall scikit-learn (ignoring the previous broken installation): -.. prompt:: python $ +.. prompt:: bash $ pip install --exists-action=i scikit-learn diff --git a/doc/jupyter-lite.json b/doc/jupyter-lite.json new file mode 100644 index 0000000000000..e582ad81eb541 --- /dev/null +++ b/doc/jupyter-lite.json @@ -0,0 +1,10 @@ +{ + "jupyter-lite-schema-version": 0, + "jupyter-config-data": { + "litePluginSettings": { + "@jupyterlite/pyodide-kernel-extension:kernel": { + "pyodideUrl": "https://cdn.jsdelivr.net/pyodide/v0.25.0/full/pyodide.js" + } + } + } +} diff --git a/doc/jupyter_lite_config.json b/doc/jupyter_lite_config.json new file mode 100644 index 0000000000000..6b25be20912a8 --- /dev/null +++ b/doc/jupyter_lite_config.json @@ -0,0 +1,5 @@ +{ + "LiteBuildConfig": { + "no_sourcemaps": true + } +} diff --git a/doc/authors.rst b/doc/maintainers.rst similarity index 89% rename from doc/authors.rst rename to doc/maintainers.rst index ae8e39cbaa549..0ba69d8afa60d 100644 --- a/doc/authors.rst +++ b/doc/maintainers.rst @@ -34,6 +34,10 @@

Yaroslav Halchenko

+
+

Tim Head

+
+

Nicolas Hug

@@ -74,6 +78,10 @@

Hanmin Qin

+
+

Omar Salman

+
+

Bertrand Thirion

@@ -90,6 +98,10 @@

Nelle Varoquaux

+
+

Yao Xiao

+
+

Roman Yurchak

diff --git a/doc/authors_emeritus.rst b/doc/maintainers_emeritus.rst similarity index 94% rename from doc/authors_emeritus.rst rename to doc/maintainers_emeritus.rst index 21ce287541f8b..b979b77bba974 100644 --- a/doc/authors_emeritus.rst +++ b/doc/maintainers_emeritus.rst @@ -27,7 +27,7 @@ - Peter Prettenhofer - (Venkat) Raghav, Rajagopalan - Jacob Schreiber -- Du Shiqiao +- 杜世橋 Du Shiqiao - Jake Vanderplas - David Warde-Farley -- Ron Weiss \ No newline at end of file +- Ron Weiss diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst new file mode 100644 index 0000000000000..d319b311dddd7 --- /dev/null +++ b/doc/metadata_routing.rst @@ -0,0 +1,329 @@ +.. currentmodule:: sklearn + +.. TODO: update doc/conftest.py once document is updated and examples run. + +.. _metadata_routing: + +Metadata Routing +================ + +.. note:: + The Metadata Routing API is experimental, and is not yet implemented for all + estimators. Please refer to the :ref:`list of supported and unsupported + models ` for more information. It may change without + the usual deprecation cycle. By default this feature is not enabled. You can + enable it by setting the ``enable_metadata_routing`` flag to + ``True``:: + + >>> import sklearn + >>> sklearn.set_config(enable_metadata_routing=True) + + Note that the methods and requirements introduced in this document are only + relevant if you want to pass :term:`metadata` (e.g. ``sample_weight``) to a method. + If you're only passing ``X`` and ``y`` and no other parameter / metadata to + methods such as :term:`fit`, :term:`transform`, etc., then you don't need to set + anything. + +This guide demonstrates how :term:`metadata` can be routed and passed between objects in +scikit-learn. If you are developing a scikit-learn compatible estimator or +meta-estimator, you can check our related developer guide: +:ref:`sphx_glr_auto_examples_miscellaneous_plot_metadata_routing.py`. + +Metadata is data that an estimator, scorer, or CV splitter takes into account if the +user explicitly passes it as a parameter. For instance, :class:`~cluster.KMeans` accepts +`sample_weight` in its `fit()` method and considers it to calculate its centroids. +`classes` are consumed by some classifiers and `groups` are used in some splitters, but +any data that is passed into an object's methods apart from X and y can be considered as +metadata. Prior to scikit-learn version 1.3, there was no single API for passing +metadata like that if these objects were used in conjunction with other objects, e.g. a +scorer accepting `sample_weight` inside a :class:`~model_selection.GridSearchCV`. + +With the Metadata Routing API, we can transfer metadata to estimators, scorers, and CV +splitters using :term:`meta-estimators` (such as :class:`~pipeline.Pipeline` or +:class:`~model_selection.GridSearchCV`) or functions such as +:func:`~model_selection.cross_validate` which route data to other objects. In order to +pass metadata to a method like ``fit`` or ``score``, the object consuming the metadata, +must *request* it. This is done via `set_{method}_request()` methods, where `{method}` +is substituted by the name of the method that requests the metadata. For instance, +estimators that use the metadata in their `fit()` method would use `set_fit_request()`, +and scorers would use `set_score_request()`. These methods allow us to specify which +metadata to request, for instance `set_fit_request(sample_weight=True)`. + +For grouped splitters such as :class:`~model_selection.GroupKFold`, a +``groups`` parameter is requested by default. This is best demonstrated by the +following examples. + +Usage Examples +************** +Here we present a few examples to show some common use-cases. Our goal is to pass +`sample_weight` and `groups` through :func:`~model_selection.cross_validate`, which +routes the metadata to :class:`~linear_model.LogisticRegressionCV` and to a custom scorer +made with :func:`~metrics.make_scorer`, both of which *can* use the metadata in their +methods. In these examples we want to individually set whether to use the metadata +within the different :term:`consumers `. + +The examples in this section require the following imports and data:: + + >>> import numpy as np + >>> from sklearn.metrics import make_scorer, accuracy_score + >>> from sklearn.linear_model import LogisticRegressionCV, LogisticRegression + >>> from sklearn.model_selection import cross_validate, GridSearchCV, GroupKFold + >>> from sklearn.feature_selection import SelectKBest + >>> from sklearn.pipeline import make_pipeline + >>> n_samples, n_features = 100, 4 + >>> rng = np.random.RandomState(42) + >>> X = rng.rand(n_samples, n_features) + >>> y = rng.randint(0, 2, size=n_samples) + >>> my_groups = rng.randint(0, 10, size=n_samples) + >>> my_weights = rng.rand(n_samples) + >>> my_other_weights = rng.rand(n_samples) + +Weighted scoring and fitting +---------------------------- + +The splitter used internally in :class:`~linear_model.LogisticRegressionCV`, +:class:`~model_selection.GroupKFold`, requests ``groups`` by default. However, we need +to explicitly request `sample_weight` for it and for our custom scorer by specifying +`sample_weight=True` in :class:`~linear_model.LogisticRegressionCV`s `set_fit_request()` +method and in :func:`~metrics.make_scorer`s `set_score_request()` method. Both +:term:`consumers ` know how to use ``sample_weight`` in their `fit()` or +`score()` methods. We can then pass the metadata in +:func:`~model_selection.cross_validate` which will route it to any active consumers:: + + >>> weighted_acc = make_scorer(accuracy_score).set_score_request(sample_weight=True) + >>> lr = LogisticRegressionCV( + ... cv=GroupKFold(), + ... scoring=weighted_acc + ... ).set_fit_request(sample_weight=True) + >>> cv_results = cross_validate( + ... lr, + ... X, + ... y, + ... params={"sample_weight": my_weights, "groups": my_groups}, + ... cv=GroupKFold(), + ... scoring=weighted_acc, + ... ) + +Note that in this example, :func:`~model_selection.cross_validate` routes ``my_weights`` +to both the scorer and :class:`~linear_model.LogisticRegressionCV`. + +If we would pass `sample_weight` in the params of +:func:`~model_selection.cross_validate`, but not set any object to request it, +`UnsetMetadataPassedError` would be raised, hinting to us that we need to explicitly set +where to route it. The same applies if ``params={"sample_weights": my_weights, ...}`` +were passed (note the typo, i.e. ``weights`` instead of ``weight``), since +``sample_weights`` was not requested by any of its underlying objects. + +Weighted scoring and unweighted fitting +--------------------------------------- + +When passing metadata such as ``sample_weight`` into a :term:`router` +(:term:`meta-estimators` or routing function), all ``sample_weight`` :term:`consumers +` require weights to be either explicitly requested or explicitly not +requested (i.e. ``True`` or ``False``). Thus, to perform an unweighted fit, we need to +configure :class:`~linear_model.LogisticRegressionCV` to not request sample weights, so +that :func:`~model_selection.cross_validate` does not pass the weights along:: + + >>> weighted_acc = make_scorer(accuracy_score).set_score_request(sample_weight=True) + >>> lr = LogisticRegressionCV( + ... cv=GroupKFold(), scoring=weighted_acc, + ... ).set_fit_request(sample_weight=False) + >>> cv_results = cross_validate( + ... lr, + ... X, + ... y, + ... cv=GroupKFold(), + ... params={"sample_weight": my_weights, "groups": my_groups}, + ... scoring=weighted_acc, + ... ) + +If :meth:`linear_model.LogisticRegressionCV.set_fit_request` had not been called, +:func:`~model_selection.cross_validate` would raise an error because ``sample_weight`` +is passed but :class:`~linear_model.LogisticRegressionCV` would not be explicitly +configured to recognize the weights. + +Unweighted feature selection +---------------------------- + +Routing metadata is only possible if the object's method knows how to use the metadata, +which in most cases means they have it as an explicit parameter. Only then we can set +request values for metadata using `set_fit_request(sample_weight=True)`, for instance. +This makes the object a :term:`consumer `. + +Unlike :class:`~linear_model.LogisticRegressionCV`, +:class:`~feature_selection.SelectKBest` can't consume weights and therefore no request +value for ``sample_weight`` on its instance is set and ``sample_weight`` is not routed +to it:: + + >>> weighted_acc = make_scorer(accuracy_score).set_score_request(sample_weight=True) + >>> lr = LogisticRegressionCV( + ... cv=GroupKFold(), scoring=weighted_acc, + ... ).set_fit_request(sample_weight=True) + >>> sel = SelectKBest(k=2) + >>> pipe = make_pipeline(sel, lr) + >>> cv_results = cross_validate( + ... pipe, + ... X, + ... y, + ... cv=GroupKFold(), + ... params={"sample_weight": my_weights, "groups": my_groups}, + ... scoring=weighted_acc, + ... ) + +Different scoring and fitting weights +------------------------------------- + +Despite :func:`~metrics.make_scorer` and +:class:`~linear_model.LogisticRegressionCV` both expecting the key +``sample_weight``, we can use aliases to pass different weights to different +consumers. In this example, we pass ``scoring_weight`` to the scorer, and +``fitting_weight`` to :class:`~linear_model.LogisticRegressionCV`:: + + >>> weighted_acc = make_scorer(accuracy_score).set_score_request( + ... sample_weight="scoring_weight" + ... ) + >>> lr = LogisticRegressionCV( + ... cv=GroupKFold(), scoring=weighted_acc, + ... ).set_fit_request(sample_weight="fitting_weight") + >>> cv_results = cross_validate( + ... lr, + ... X, + ... y, + ... cv=GroupKFold(), + ... params={ + ... "scoring_weight": my_weights, + ... "fitting_weight": my_other_weights, + ... "groups": my_groups, + ... }, + ... scoring=weighted_acc, + ... ) + +API Interface +************* + +A :term:`consumer` is an object (estimator, meta-estimator, scorer, splitter) which +accepts and uses some :term:`metadata` in at least one of its methods (for instance +``fit``, ``predict``, ``inverse_transform``, ``transform``, ``score``, ``split``). +Meta-estimators which only forward the metadata to other objects (child estimators, +scorers, or splitters) and don't use the metadata themselves are not consumers. +(Meta-)Estimators which route metadata to other objects are :term:`routers `. +A(n) (meta-)estimator can be a :term:`consumer` and a :term:`router` at the same time. +(Meta-)Estimators and splitters expose a `set_{method}_request` method for each method +which accepts at least one metadata. For instance, if an estimator supports +``sample_weight`` in ``fit`` and ``score``, it exposes +``estimator.set_fit_request(sample_weight=value)`` and +``estimator.set_score_request(sample_weight=value)``. Here ``value`` can be: + +- ``True``: method requests a ``sample_weight``. This means if the metadata is provided, + it will be used, otherwise no error is raised. +- ``False``: method does not request a ``sample_weight``. +- ``None``: router will raise an error if ``sample_weight`` is passed. This is in almost + all cases the default value when an object is instantiated and ensures the user sets + the metadata requests explicitly when a metadata is passed. The only exception are + ``Group*Fold`` splitters. +- ``"param_name"``: alias for ``sample_weight`` if we want to pass different weights to + different consumers. If aliasing is used the meta-estimator should not forward + ``"param_name"`` to the consumer, but ``sample_weight`` instead, because the consumer + will expect a param called ``sample_weight``. This means the mapping between the + metadata required by the object, e.g. ``sample_weight`` and the variable name provided + by the user, e.g. ``my_weights`` is done at the router level, and not by the consuming + object itself. + +Metadata are requested in the same way for scorers using ``set_score_request``. + +If a metadata, e.g. ``sample_weight``, is passed by the user, the metadata request for +all objects which potentially can consume ``sample_weight`` should be set by the user, +otherwise an error is raised by the router object. For example, the following code +raises an error, since it hasn't been explicitly specified whether ``sample_weight`` +should be passed to the estimator's scorer or not:: + + >>> param_grid = {"C": [0.1, 1]} + >>> lr = LogisticRegression().set_fit_request(sample_weight=True) + >>> try: + ... GridSearchCV( + ... estimator=lr, param_grid=param_grid + ... ).fit(X, y, sample_weight=my_weights) + ... except ValueError as e: + ... print(e) + [sample_weight] are passed but are not explicitly set as requested or not + requested for LogisticRegression.score, which is used within GridSearchCV.fit. + Call `LogisticRegression.set_score_request({metadata}=True/False)` for each metadata + you want to request/ignore. + +The issue can be fixed by explicitly setting the request value:: + + >>> lr = LogisticRegression().set_fit_request( + ... sample_weight=True + ... ).set_score_request(sample_weight=False) + +At the end of the **Usage Examples** section, we disable the configuration flag for +metadata routing:: + + >>> sklearn.set_config(enable_metadata_routing=False) + +.. _metadata_routing_models: + +Metadata Routing Support Status +******************************* +All consumers (i.e. simple estimators which only consume metadata and don't +route them) support metadata routing, meaning they can be used inside +meta-estimators which support metadata routing. However, development of support +for metadata routing for meta-estimators is in progress, and here is a list of +meta-estimators and tools which support and don't yet support metadata routing. + + +Meta-estimators and functions supporting metadata routing: + +- :class:`sklearn.calibration.CalibratedClassifierCV` +- :class:`sklearn.compose.ColumnTransformer` +- :class:`sklearn.covariance.GraphicalLassoCV` +- :class:`sklearn.ensemble.VotingClassifier` +- :class:`sklearn.ensemble.VotingRegressor` +- :class:`sklearn.ensemble.BaggingClassifier` +- :class:`sklearn.ensemble.BaggingRegressor` +- :class:`sklearn.feature_selection.SelectFromModel` +- :class:`sklearn.impute.IterativeImputer` +- :class:`sklearn.linear_model.ElasticNetCV` +- :class:`sklearn.linear_model.LarsCV` +- :class:`sklearn.linear_model.LassoCV` +- :class:`sklearn.linear_model.LassoLarsCV` +- :class:`sklearn.linear_model.LogisticRegressionCV` +- :class:`sklearn.linear_model.MultiTaskElasticNetCV` +- :class:`sklearn.linear_model.MultiTaskLassoCV` +- :class:`sklearn.linear_model.RANSACRegressor` +- :class:`sklearn.linear_model.RidgeClassifierCV` +- :class:`sklearn.linear_model.RidgeCV` +- :class:`sklearn.model_selection.GridSearchCV` +- :class:`sklearn.model_selection.HalvingGridSearchCV` +- :class:`sklearn.model_selection.HalvingRandomSearchCV` +- :class:`sklearn.model_selection.RandomizedSearchCV` +- :func:`sklearn.model_selection.cross_validate` +- :func:`sklearn.model_selection.cross_val_score` +- :func:`sklearn.model_selection.cross_val_predict` +- :class:`sklearn.multiclass.OneVsOneClassifier` +- :class:`sklearn.multiclass.OneVsRestClassifier` +- :class:`sklearn.multiclass.OutputCodeClassifier` +- :class:`sklearn.multioutput.ClassifierChain` +- :class:`sklearn.multioutput.MultiOutputClassifier` +- :class:`sklearn.multioutput.MultiOutputRegressor` +- :class:`sklearn.linear_model.OrthogonalMatchingPursuitCV` +- :class:`sklearn.multioutput.RegressorChain` +- :class:`sklearn.pipeline.FeatureUnion` +- :class:`sklearn.pipeline.Pipeline` + +Meta-estimators and tools not supporting metadata routing yet: + +- :class:`sklearn.compose.TransformedTargetRegressor` +- :class:`sklearn.ensemble.AdaBoostClassifier` +- :class:`sklearn.ensemble.AdaBoostRegressor` +- :class:`sklearn.ensemble.StackingClassifier` +- :class:`sklearn.ensemble.StackingRegressor` +- :class:`sklearn.feature_selection.RFE` +- :class:`sklearn.feature_selection.RFECV` +- :class:`sklearn.feature_selection.SequentialFeatureSelector` +- :class:`sklearn.impute.IterativeImputer` +- :class:`sklearn.linear_model.RANSACRegressor` +- :class:`sklearn.model_selection.learning_curve` +- :class:`sklearn.model_selection.permutation_test_score` +- :class:`sklearn.model_selection.validation_curve` +- :class:`sklearn.semi_supervised.SelfTrainingClassifier` diff --git a/doc/model_persistence.rst b/doc/model_persistence.rst index 13183cd2efb31..0bc7384ec3d46 100644 --- a/doc/model_persistence.rst +++ b/doc/model_persistence.rst @@ -9,117 +9,365 @@ Model persistence ================= After training a scikit-learn model, it is desirable to have a way to persist -the model for future use without having to retrain. The following sections give -you some hints on how to persist a scikit-learn model. +the model for future use without having to retrain. Based on your use-case, +there are a few different ways to persist a scikit-learn model, and here we +help you decide which one suits you best. In order to make a decision, you need +to answer the following questions: -Python specific serialization ------------------------------ +1. Do you need the Python object after persistence, or do you only need to + persist in order to serve the model and get predictions out of it? -It is possible to save a model in scikit-learn by using Python's built-in -persistence model, namely `pickle -`_:: +If you only need to serve the model and no further investigation on the Python +object itself is required, then :ref:`ONNX ` might be the +best fit for you. Note that not all models are supported by ONNX. - >>> from sklearn import svm +In case ONNX is not suitable for your use-case, the next question is: + +2. Do you absolutely trust the source of the model, or are there any security + concerns regarding where the persisted model comes from? + +If you have security concerns, then you should consider using :ref:`skops.io +` which gives you back the Python object, but unlike +`pickle` based persistence solutions, loading the persisted model doesn't +automatically allow arbitrary code execution. Note that this requires manual +investigation of the persisted file, which :mod:`skops.io` allows you to do. + +The other solutions assume you absolutely trust the source of the file to be +loaded, as they are all susceptible to arbitrary code execution upon loading +the persisted file since they all use the pickle protocol under the hood. + +3. Do you care about the performance of loading the model, and sharing it + between processes where a memory mapped object on disk is beneficial? + +If yes, then you can consider using :ref:`joblib `. If this +is not a major concern for you, then you can use the built-in :mod:`pickle` +module. + +4. Did you try :mod:`pickle` or :mod:`joblib` and found that the model cannot + be persisted? It can happen for instance when you have user defined + functions in your model. + +If yes, then you can use `cloudpickle`_ which can serialize certain objects +which cannot be serialized by :mod:`pickle` or :mod:`joblib`. + + +Workflow Overview +----------------- + +In a typical workflow, the first step is to train the model using scikit-learn +and scikit-learn compatible libraries. Note that support for scikit-learn and +third party estimators varies across the different persistence methods. + +Train and Persist the Model +........................... + +Creating an appropriate model depends on your use-case. As an example, here we +train a :class:`sklearn.ensemble.HistGradientBoostingClassifier` on the iris +dataset:: + + >>> from sklearn import ensemble >>> from sklearn import datasets - >>> clf = svm.SVC() - >>> X, y= datasets.load_iris(return_X_y=True) + >>> clf = ensemble.HistGradientBoostingClassifier() + >>> X, y = datasets.load_iris(return_X_y=True) >>> clf.fit(X, y) - SVC() + HistGradientBoostingClassifier() + +Once the model is trained, you can persist it using your desired method, and +then you can load the model in a separate environment and get predictions from +it given input data. Here there are two major paths depending on how you +persist and plan to serve the model: + +- :ref:`ONNX `: You need an `ONNX` runtime and an environment + with appropriate dependencies installed to load the model and use the runtime + to get predictions. This environment can be minimal and does not necessarily + even require Python to be installed to load the model and compute + predictions. Also note that `onnxruntime` typically requires much less RAM + than Python to to compute predictions from small models. + +- :mod:`skops.io`, :mod:`pickle`, :mod:`joblib`, `cloudpickle`_: You need a + Python environment with the appropriate dependencies installed to load the + model and get predictions from it. This environment should have the same + **packages** and the same **versions** as the environment where the model was + trained. Note that none of these methods support loading a model trained with + a different version of scikit-learn, and possibly different versions of other + dependencies such as `numpy` and `scipy`. Another concern would be running + the persisted model on a different hardware, and in most cases you should be + able to load your persisted model on a different hardware. + + +.. _onnx_persistence: + +ONNX +---- + +`ONNX`, or `Open Neural Network Exchange `__ format is best +suitable in use-cases where one needs to persist the model and then use the +persisted artifact to get predictions without the need to load the Python +object itself. It is also useful in cases where the serving environment needs +to be lean and minimal, since the `ONNX` runtime does not require `python`. + +`ONNX` is a binary serialization of the model. It has been developed to improve +the usability of the interoperable representation of data models. It aims to +facilitate the conversion of the data models between different machine learning +frameworks, and to improve their portability on different computing +architectures. More details are available from the `ONNX tutorial +`__. To convert scikit-learn model to `ONNX` +`sklearn-onnx `__ has been developed. However, +not all scikit-learn models are supported, and it is limited to the core +scikit-learn and does not support most third party estimators. One can write a +custom converter for third party or custom estimators, but the documentation to +do that is sparse and it might be challenging to do so. + +|details-start| +**Using ONNX** +|details-split| + +To convert the model to `ONNX` format, you need to give the converter some +information about the input as well, about which you can read more `here +`__:: + + from skl2onnx import to_onnx + onx = to_onnx(clf, X[:1].astype(numpy.float32), target_opset=12) + with open("filename.onnx", "wb") as f: + f.write(onx.SerializeToString()) + +You can load the model in Python and use the `ONNX` runtime to get +predictions:: + + from onnxruntime import InferenceSession + with open("filename.onnx", "rb") as f: + onx = f.read() + sess = InferenceSession(onx, providers=["CPUExecutionProvider"]) + pred_ort = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] + + +|details-end| + +.. _skops_persistence: + +`skops.io` +---------- + +:mod:`skops.io` avoids using :mod:`pickle` and only loads files which have types +and references to functions which are trusted either by default or by the user. +Therefore it provides a more secure format than :mod:`pickle`, :mod:`joblib`, +and `cloudpickle`_. + - >>> import pickle - >>> s = pickle.dumps(clf) - >>> clf2 = pickle.loads(s) - >>> clf2.predict(X[0:1]) - array([0]) - >>> y[0] - 0 +|details-start| +**Using skops** +|details-split| -In the specific case of scikit-learn, it may be better to use joblib's -replacement of pickle (``dump`` & ``load``), which is more efficient on -objects that carry large numpy arrays internally as is often the case for -fitted scikit-learn estimators, but can only pickle to the disk and not to a -string:: +The API is very similar to :mod:`pickle`, and you can persist your models as +explained in the `documentation +`__ using +:func:`skops.io.dump` and :func:`skops.io.dumps`:: - >>> from joblib import dump, load - >>> dump(clf, 'filename.joblib') # doctest: +SKIP + import skops.io as sio + obj = sio.dump(clf, "filename.skops") -Later you can load back the pickled model (possibly in another Python process) -with:: +And you can load them back using :func:`skops.io.load` and +:func:`skops.io.loads`. However, you need to specify the types which are +trusted by you. You can get existing unknown types in a dumped object / file +using :func:`skops.io.get_untrusted_types`, and after checking its contents, +pass it to the load function:: - >>> clf = load('filename.joblib') # doctest:+SKIP + unknown_types = sio.get_untrusted_types(file="filename.skops") + # investigate the contents of unknown_types, and only load if you trust + # everything you see. + clf = sio.load("filename.skops", trusted=unknown_types) -.. note:: +Please report issues and feature requests related to this format on the `skops +issue tracker `__. - ``dump`` and ``load`` functions also accept file-like object - instead of filenames. More information on data persistence with Joblib is - available `here - `_. +|details-end| + +.. _pickle_persistence: + +`pickle`, `joblib`, and `cloudpickle` +------------------------------------- + +These three modules / packages, use the `pickle` protocol under the hood, but +come with slight variations: + +- :mod:`pickle` is a module from the Python Standard Library. It can serialize + and deserialize any Python object, including custom Python classes and + objects. +- :mod:`joblib` is more efficient than `pickle` when working with large machine + learning models or large numpy arrays. +- `cloudpickle`_ can serialize certain objects which cannot be serialized by + :mod:`pickle` or :mod:`joblib`, such as user defined functions and lambda + functions. This can happen for instance, when using a + :class:`~sklearn.preprocessing.FunctionTransformer` and using a custom + function to transform the data. + +|details-start| +**Using** ``pickle``, ``joblib``, **or** ``cloudpickle`` +|details-split| + +Depending on your use-case, you can choose one of these three methods to +persist and load your scikit-learn model, and they all follow the same API:: + + # Here you can replace pickle with joblib or cloudpickle + from pickle import dump + with open("filename.pkl", "wb") as f: + dump(clf, f, protocol=5) + +Using `protocol=5` is recommended to reduce memory usage and make it faster to +store and load any large NumPy array stored as a fitted attribute in the model. +You can alternatively pass `protocol=pickle.HIGHEST_PROTOCOL` which is +equivalent to `protocol=5` in Python 3.8 and later (at the time of writing). + +And later when needed, you can load the same object from the persisted file:: + + # Here you can replace pickle with joblib or cloudpickle + from pickle import load + with open("filename.pkl", "rb") as f: + clf = load(f) + +|details-end| .. _persistence_limitations: -Security & maintainability limitations -...................................... +Security & Maintainability Limitations +-------------------------------------- + +:mod:`pickle` (and :mod:`joblib` and :mod:`clouldpickle` by extension), has +many documented security vulnerabilities by design and should only be used if +the artifact, i.e. the pickle-file, is coming from a trusted and verified +source. You should never load a pickle file from an untrusted source, similarly +to how you should never execute code from an untrusted source. -pickle (and joblib by extension), has some issues regarding maintainability -and security. Because of this, +Also note that arbitrary computations can be represented using the `ONNX` +format, and it is therefore recommended to serve models using `ONNX` in a +sandboxed environment to safeguard against computational and memory exploits. -* Never unpickle untrusted data as it could lead to malicious code being - executed upon loading. -* While models saved using one version of scikit-learn might load in - other versions, this is entirely unsupported and inadvisable. It should - also be kept in mind that operations performed on such data could give - different and unexpected results. +Also note that there are no supported ways to load a model trained with a +different version of scikit-learn. While using :mod:`skops.io`, :mod:`joblib`, +:mod:`pickle`, or `cloudpickle`_, models saved using one version of +scikit-learn might load in other versions, however, this is entirely +unsupported and inadvisable. It should also be kept in mind that operations +performed on such data could give different and unexpected results, or even +crash your Python process. In order to rebuild a similar model with future versions of scikit-learn, additional metadata should be saved along the pickled model: * The training data, e.g. a reference to an immutable snapshot -* The python source code used to generate the model +* The Python source code used to generate the model * The versions of scikit-learn and its dependencies * The cross validation score obtained on the training data This should make it possible to check that the cross-validation score is in the same range as before. -Aside for a few exceptions, pickled models should be portable across -architectures assuming the same versions of dependencies and Python are used. -If you encounter an estimator that is not portable please open an issue on -GitHub. Pickled models are often deployed in production using containers, like -Docker, in order to freeze the environment and dependencies. - -If you want to know more about these issues and explore other possible -serialization methods, please refer to this -`talk by Alex Gaynor -`_. - -Interoperable formats ---------------------- - -For reproducibility and quality control needs, when different architectures -and environments should be taken into account, exporting the model in -`Open Neural Network -Exchange `_ format or `Predictive Model Markup Language -(PMML) `_ format -might be a better approach than using `pickle` alone. -These are helpful where you may want to use your model for prediction in a -different environment from where the model was trained. - -ONNX is a binary serialization of the model. It has been developed to improve -the usability of the interoperable representation of data models. -It aims to facilitate the conversion of the data -models between different machine learning frameworks, and to improve their -portability on different computing architectures. More details are available -from the `ONNX tutorial `_. -To convert scikit-learn model to ONNX a specific tool `sklearn-onnx -`_ has been developed. - -PMML is an implementation of the `XML -`_ document standard -defined to represent data models together with the data used to generate them. -Being human and machine readable, -PMML is a good option for model validation on different platforms and -long term archiving. On the other hand, as XML in general, its verbosity does -not help in production when performance is critical. -To convert scikit-learn model to PMML you can use for example `sklearn2pmml -`_ distributed under the Affero GPLv3 -license. +Aside for a few exceptions, persisted models should be portable across +operating systems and hardware architectures assuming the same versions of +dependencies and Python are used. If you encounter an estimator that is not +portable, please open an issue on GitHub. Persisted models are often deployed +in production using containers like Docker, in order to freeze the environment +and dependencies. + +If you want to know more about these issues, please refer to these talks: + +- `Adrin Jalali: Let's exploit pickle, and skops to the rescue! | PyData + Amsterdam 2023 `__. +- `Alex Gaynor: Pickles are for Delis, not Software - PyCon 2014 + `__. + + +.. _serving_environment: + +Replicating the training environment in production +.................................................. + +If the versions of the dependencies used may differ from training to +production, it may result in unexpected behaviour and errors while using the +trained model. To prevent such situations it is recommended to use the same +dependencies and versions in both the training and production environment. +These transitive dependencies can be pinned with the help of package management +tools like `pip`, `mamba`, `conda`, `poetry`, `conda-lock`, `pixi`, etc. + +It is not always possible to load an model trained with older versions of the +scikit-learn library and its dependencies in an updated software environment. +Instead, you might need to retrain the model with the new versions of the all +the libraries. So when training a model, it is important to record the training +recipe (e.g. a Python script) and training set information, and metadata about +all the dependencies to be able to automatically reconstruct the same training +environment for the updated software. + +|details-start| +**InconsistentVersionWarning** +|details-split| + +When an estimator is loaded with a scikit-learn version that is inconsistent +with the version the estimator was pickled with, a +:class:`~sklearn.exceptions.InconsistentVersionWarning` is raised. This warning +can be caught to obtain the original version the estimator was pickled with:: + + from sklearn.exceptions import InconsistentVersionWarning + warnings.simplefilter("error", InconsistentVersionWarning) + + try: + with open("model_from_prevision_version.pickle", "rb") as f: + est = pickle.load(f) + except InconsistentVersionWarning as w: + print(w.original_sklearn_version) + +|details-end| + + +Serving the model artifact +.......................... + +The last step after training a scikit-learn model is serving the model. +Once the trained model is successfully loaded, it can be served to manage +different prediction requests. This can involve deploying the model as a +web service using containerization, or other model deployment strategies, +according to the specifications. + + +Summarizing the key points +-------------------------- + +Based on the different approaches for model persistence, the key points for +each approach can be summarized as follows: + +* `ONNX`: It provides a uniform format for persisting any machine learning or + deep learning model (other than scikit-learn) and is useful for model + inference (predictions). It can however, result in compatibility issues with + different frameworks. +* :mod:`skops.io`: Trained scikit-learn models can be easily shared and put + into production using :mod:`skops.io`. It is more secure compared to + alternate approaches based on :mod:`pickle` because it does not load + arbitrary code unless explicitly asked for by the user. Such code needs to be + packaged and importable in the target Python environment. +* :mod:`joblib`: Efficient memory mapping techniques make it faster when using + the same persisted model in multiple Python processes when using + `mmap_mode="r"`. It also gives easy shortcuts to compress and decompress the + persisted object without the need for extra code. However, it may trigger the + execution of malicious code when loading a model from an untrusted source as + any other pickle-based persistence mechanism. +* :mod:`pickle`: It is native to Python and most Python objects can be + serialized and deserialized using :mod:`pickle`, including custom Python + classes and functions as long as they are defined in a package that can be + imported in the target environment. While :mod:`pickle` can be used to easily + save and load scikit-learn models, it may trigger the execution of malicious + code while loading a model from an untrusted source. :mod:`pickle` can also + be very efficient memorywise if the model was persisted with `protocol=5` but + it does not support memory mapping. +* `cloudpickle`_: It has comparable loading efficiency as :mod:`pickle` and + :mod:`joblib` (without memory mapping), but offers additional flexibility to + serialize custom Python code such as lambda expressions and interactively + defined functions and classes. It might be a last resort to persist pipelines + with custom Python components such as a + :class:`sklearn.preprocessing.FunctionTransformer` that wraps a function + defined in the training script itself or more generally outside of any + importable Python package. Note that `cloudpickle`_ offers no forward + compatibility guarantees and you might need the same version of + `cloudpickle`_ to load the persisted model along with the same version of all + the libraries used to define the model. As the other pickle-based persistence + mechanisms, it may trigger the execution of malicious code while loading + a model from an untrusted source. + +.. _cloudpickle: https://github.com/cloudpipe/cloudpickle diff --git a/doc/model_selection.rst b/doc/model_selection.rst index 25cd2b655ccc5..522544aefc820 100644 --- a/doc/model_selection.rst +++ b/doc/model_selection.rst @@ -14,5 +14,6 @@ Model selection and evaluation modules/cross_validation modules/grid_search + modules/classification_threshold modules/model_evaluation modules/learning_curve diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index 0d89ec2ef5879..7a21274a7250f 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -12,6 +12,8 @@ Array API support (experimental) The `Array API `_ specification defines a standard API for all array manipulation libraries with a NumPy-like API. +Scikit-learn's Array API support requires +`array-api-compat `__ to be installed. Some scikit-learn estimators that primarily rely on NumPy (as opposed to using Cython) to implement the algorithmic logic of their `fit`, `predict` or @@ -23,8 +25,8 @@ At this stage, this support is **considered experimental** and must be enabled explicitly as explained in the following. .. note:: - Currently, only `cupy.array_api` and `numpy.array_api` are known to work - with scikit-learn's estimators. + Currently, only `cupy.array_api`, `array-api-strict`, `cupy`, and `PyTorch` + are known to work with scikit-learn's estimators. Example usage ============= @@ -36,11 +38,11 @@ Here is an example code snippet to demonstrate how to use `CuPy >>> from sklearn.datasets import make_classification >>> from sklearn import config_context >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis - >>> import cupy.array_api as xp + >>> import cupy >>> X_np, y_np = make_classification(random_state=0) - >>> X_cu = xp.asarray(X_np) - >>> y_cu = xp.asarray(y_np) + >>> X_cu = cupy.asarray(X_np) + >>> y_cu = cupy.asarray(y_np) >>> X_cu.device @@ -57,19 +59,117 @@ GPU. We provide a experimental `_estimator_with_converted_arrays` utility that transfers an estimator attributes from Array API to a ndarray:: >>> from sklearn.utils._array_api import _estimator_with_converted_arrays - >>> cupy_to_ndarray = lambda array : array._array.get() + >>> cupy_to_ndarray = lambda array : array.get() >>> lda_np = _estimator_with_converted_arrays(lda, cupy_to_ndarray) >>> X_trans = lda_np.transform(X_np) >>> type(X_trans) -.. _array_api_estimators: +PyTorch Support +--------------- -Estimators with support for `Array API`-compatible inputs -========================================================= +PyTorch Tensors are supported by setting `array_api_dispatch=True` and passing in +the tensors directly:: + >>> import torch + >>> X_torch = torch.asarray(X_np, device="cuda", dtype=torch.float32) + >>> y_torch = torch.asarray(y_np, device="cuda", dtype=torch.float32) + + >>> with config_context(array_api_dispatch=True): + ... lda = LinearDiscriminantAnalysis() + ... X_trans = lda.fit_transform(X_torch, y_torch) + >>> type(X_trans) + + >>> X_trans.device.type + 'cuda' + +.. _array_api_supported: + +Support for `Array API`-compatible inputs +========================================= + +Estimators and other tools in scikit-learn that support Array API compatible inputs. + +Estimators +---------- + +- :class:`decomposition.PCA` (with `svd_solver="full"`, + `svd_solver="randomized"` and `power_iteration_normalizer="QR"`) +- :class:`linear_model.Ridge` (with `solver="svd"`) - :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`) +- :class:`preprocessing.KernelCenterer` +- :class:`preprocessing.MaxAbsScaler` +- :class:`preprocessing.MinMaxScaler` +- :class:`preprocessing.Normalizer` + +Metrics +------- + +- :func:`sklearn.metrics.accuracy_score` +- :func:`sklearn.metrics.r2_score` +- :func:`sklearn.metrics.zero_one_loss` + +Tools +----- -Coverage for more estimators is expected to grow over time. Please follow the -dedicated `meta-issue on GitHub +- :func:`model_selection.train_test_split` + +Coverage is expected to grow over time. Please follow the dedicated `meta-issue on GitHub `_ to track progress. + +Type of return values and fitted attributes +------------------------------------------- + +When calling functions or methods with Array API compatible inputs, the +convention is to return array values of the same array container type and +device as the input data. + +Similarly, when an estimator is fitted with Array API compatible inputs, the +fitted attributes will be arrays from the same library as the input and stored +on the same device. The `predict` and `transform` method subsequently expect +inputs from the same array library and device as the data passed to the `fit` +method. + +Note however that scoring functions that return scalar values return Python +scalars (typically a `float` instance) instead of an array scalar value. + +Common estimator checks +======================= + +Add the `array_api_support` tag to an estimator's set of tags to indicate that +it supports the Array API. This will enable dedicated checks as part of the +common tests to verify that the estimators result's are the same when using +vanilla NumPy and Array API inputs. + +To run these checks you need to install +`array_api_compat `_ in your +test environment. To run the full set of checks you need to install both +`PyTorch `_ and `CuPy `_ and have +a GPU. Checks that can not be executed or have missing dependencies will be +automatically skipped. Therefore it's important to run the tests with the +`-v` flag to see which checks are skipped: + +.. prompt:: bash $ + + pip install array-api-compat # and other libraries as needed + pytest -k "array_api" -v + +Note on MPS device support +-------------------------- + +On macOS, PyTorch can use the Metal Performance Shaders (MPS) to access +hardware accelerators (e.g. the internal GPU component of the M1 or M2 chips). +However, the MPS device support for PyTorch is incomplete at the time of +writing. See the following github issue for more details: + +- https://github.com/pytorch/pytorch/issues/77764 + +To enable the MPS support in PyTorch, set the environment variable +`PYTORCH_ENABLE_MPS_FALLBACK=1` before running the tests: + +.. prompt:: bash $ + + PYTORCH_ENABLE_MPS_FALLBACK=1 pytest -k "array_api" -v + +At the time of writing all scikit-learn tests should pass, however, the +computational speed is not necessarily better than with the CPU device. diff --git a/doc/modules/biclustering.rst b/doc/modules/biclustering.rst index 44a996ed0ffd6..2189e85e0f0ef 100644 --- a/doc/modules/biclustering.rst +++ b/doc/modules/biclustering.rst @@ -4,8 +4,7 @@ Biclustering ============ -Biclustering can be performed with the module -:mod:`sklearn.cluster.bicluster`. Biclustering algorithms simultaneously +Biclustering algorithms simultaneously cluster rows and columns of a data matrix. These clusters of rows and columns are known as biclusters. Each determines a submatrix of the original data matrix with some desired properties. @@ -82,7 +81,7 @@ diagonal and checkerboard bicluster structures. these alternate names. -.. currentmodule:: sklearn.cluster.bicluster +.. currentmodule:: sklearn.cluster .. _spectral_coclustering: diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst index 1fcd1d501d100..c0a6edb837b2f 100644 --- a/doc/modules/calibration.rst +++ b/doc/modules/calibration.rst @@ -20,26 +20,44 @@ prediction. Well calibrated classifiers are probabilistic classifiers for which the output of the :term:`predict_proba` method can be directly interpreted as a confidence level. -For instance, a well calibrated (binary) classifier should classify the samples -such that among the samples to which it gave a :term:`predict_proba` value -close to 0.8, -approximately 80% actually belong to the positive class. +For instance, a well calibrated (binary) classifier should classify the samples such +that among the samples to which it gave a :term:`predict_proba` value close to, say, +0.8, approximately 80% actually belong to the positive class. + +Before we show how to re-calibrate a classifier, we first need a way to detect how +good a classifier is calibrated. + +.. note:: + Strictly proper scoring rules for probabilistic predictions like + :func:`sklearn.metrics.brier_score_loss` and + :func:`sklearn.metrics.log_loss` assess calibration (reliability) and + discriminative power (resolution) of a model, as well as the randomness of the data + (uncertainty) at the same time. This follows from the well-known Brier score + decomposition of Murphy [1]_. As it is not clear which term dominates, the score is + of limited use for assessing calibration alone (unless one computes each term of + the decomposition). A lower Brier loss, for instance, does not necessarily + mean a better calibrated model, it could also mean a worse calibrated model with much + more discriminatory power, e.g. using many more features. .. _calibration_curve: Calibration curves ------------------ -Calibration curves (also known as reliability diagrams) compare how well the -probabilistic predictions of a binary classifier are calibrated. It plots -the true frequency of the positive label against its predicted probability, -for binned predictions. -The x axis represents the average predicted probability in each bin. The -y axis is the *fraction of positives*, i.e. the proportion of samples whose -class is the positive class (in each bin). The top calibration curve plot -is created with :func:`CalibrationDisplay.from_estimators`, which uses -:func:`calibration_curve` to calculate the per bin average predicted -probabilities and fraction of positives. +Calibration curves, also referred to as *reliability diagrams* (Wilks 1995 [2]_), +compare how well the probabilistic predictions of a binary classifier are calibrated. +It plots the frequency of the positive label (to be more precise, an estimation of the +*conditional event probability* :math:`P(Y=1|\text{predict_proba})`) on the y-axis +against the predicted probability :term:`predict_proba` of a model on the x-axis. +The tricky part is to get values for the y-axis. +In scikit-learn, this is accomplished by binning the predictions such that the x-axis +represents the average predicted probability in each bin. +The y-axis is then the *fraction of positives* given the predictions of that bin, i.e. +the proportion of samples whose class is the positive class (in each bin). + +The top calibration curve plot is created with +:func:`CalibrationDisplay.from_estimator`, which uses :func:`calibration_curve` to +calculate the per bin average predicted probabilities and fraction of positives. :func:`CalibrationDisplay.from_estimator` takes as input a fitted classifier, which is used to calculate the predicted probabilities. The classifier thus must have :term:`predict_proba` method. For @@ -56,13 +74,20 @@ by showing the number of samples in each predicted probability bin. .. currentmodule:: sklearn.linear_model -:class:`LogisticRegression` returns well calibrated predictions by default as it directly -optimizes :ref:`log_loss`. In contrast, the other methods return biased probabilities; -with different biases per method: +:class:`LogisticRegression` is more likely to return well calibrated predictions by itself as it has a +canonical link function for its loss, i.e. the logit-link for the :ref:`log_loss`. +In the unpenalized case, this leads to the so-called **balance property**, see [8]_ and :ref:`Logistic_regression`. +In the plot above, data is generated according to a linear mechanism, which is +consistent with the :class:`LogisticRegression` model (the model is 'well specified'), +and the value of the regularization parameter `C` is tuned to be +appropriate (neither too strong nor too low). As a consequence, this model returns +accurate predictions from its `predict_proba` method. +In contrast to that, the other shown models return biased probabilities; with +different biases per model. .. currentmodule:: sklearn.naive_bayes -:class:`GaussianNB` tends to push probabilities to 0 or 1 (note the counts +:class:`GaussianNB` (Naive Bayes) tends to push probabilities to 0 or 1 (note the counts in the histograms). This is mainly because it makes the assumption that features are conditionally independent given the class, which is not the case in this dataset which contains 2 redundant features. @@ -70,9 +95,9 @@ case in this dataset which contains 2 redundant features. .. currentmodule:: sklearn.ensemble :class:`RandomForestClassifier` shows the opposite behavior: the histograms -show peaks at approximately 0.2 and 0.9 probability, while probabilities +show peaks at probabilities approximately 0.2 and 0.9, while probabilities close to 0 or 1 are very rare. An explanation for this is given by -Niculescu-Mizil and Caruana [1]_: "Methods such as bagging and random +Niculescu-Mizil and Caruana [3]_: "Methods such as bagging and random forests that average predictions from a base set of models can have difficulty making predictions near 0 and 1 because variance in the underlying base models will bias predictions that should be near zero or one @@ -85,18 +110,16 @@ predict values larger than 0 for this case, thus moving the average prediction of the bagged ensemble away from 0. We observe this effect most strongly with random forests because the base-level trees trained with random forests have relatively high variance due to feature subsetting." As -a result, the calibration curve also referred to as the reliability diagram -(Wilks 1995 [2]_) shows a characteristic sigmoid shape, indicating that the -classifier could trust its "intuition" more and return probabilities closer +a result, the calibration curve shows a characteristic sigmoid shape, indicating that +the classifier could trust its "intuition" more and return probabilities closer to 0 or 1 typically. .. currentmodule:: sklearn.svm -Linear Support Vector Classification (:class:`LinearSVC`) shows an even more -sigmoid curve than :class:`~sklearn.ensemble.RandomForestClassifier`, which is -typical for maximum-margin methods (compare Niculescu-Mizil and Caruana [1]_), -which focus on difficult to classify samples that are close to the decision -boundary (the support vectors). +:class:`LinearSVC` (SVC) shows an even more sigmoid curve than the random forest, which +is typical for maximum-margin methods (compare Niculescu-Mizil and Caruana [3]_), which +focus on difficult to classify samples that are close to the decision boundary (the +support vectors). Calibrating a classifier ------------------------ @@ -107,10 +130,11 @@ Calibrating a classifier consists of fitting a regressor (called a *calibrator*) that maps the output of the classifier (as given by :term:`decision_function` or :term:`predict_proba`) to a calibrated probability in [0, 1]. Denoting the output of the classifier for a given sample by :math:`f_i`, -the calibrator tries to predict :math:`p(y_i = 1 | f_i)`. +the calibrator tries to predict the conditional event probability +:math:`P(y_i = 1 | f_i)`. -The samples that are used to fit the calibrator should not be the same -samples used to fit the classifier, as this would introduce bias. +Ideally, the calibrator is fit on a dataset independent of the training data used to +fit the classifier in the first place. This is because performance of the classifier on its training data would be better than for novel data. Using the classifier output of training data to fit the calibrator would thus result in a biased calibrator that maps to @@ -161,29 +185,18 @@ fit the regressor. It is up to the user to make sure that the data used for fitting the classifier is disjoint from the data used for fitting the regressor. -:func:`sklearn.metrics.brier_score_loss` may be used to assess how -well a classifier is calibrated. However, this metric should be used with care -because a lower Brier score does not always mean a better calibrated model. -This is because the Brier score metric is a combination of calibration loss -and refinement loss. Calibration loss is defined as the mean squared deviation -from empirical probabilities derived from the slope of ROC segments. -Refinement loss can be defined as the expected optimal loss as measured by the -area under the optimal cost curve. As refinement loss can change -independently from calibration loss, a lower Brier score does not necessarily -mean a better calibrated model. - -:class:`CalibratedClassifierCV` supports the use of two 'calibration' -regressors: 'sigmoid' and 'isotonic'. +:class:`CalibratedClassifierCV` supports the use of two regression techniques +for calibration via the `method` parameter: `"sigmoid"` and `"isotonic"`. .. _sigmoid_regressor: Sigmoid ^^^^^^^ -The sigmoid regressor is based on Platt's logistic model [3]_: +The sigmoid regressor, `method="sigmoid"` is based on Platt's logistic model [4]_: .. math:: - p(y_i = 1 | f_i) = \frac{1}{1 + \exp(A f_i + B)} + p(y_i = 1 | f_i) = \frac{1}{1 + \exp(A f_i + B)} \,, where :math:`y_i` is the true label of sample :math:`i` and :math:`f_i` is the output of the un-calibrated classifier for sample :math:`i`. :math:`A` @@ -194,37 +207,46 @@ The sigmoid method assumes the :ref:`calibration curve ` can be corrected by applying a sigmoid function to the raw predictions. This assumption has been empirically justified in the case of :ref:`svm` with common kernel functions on various benchmark datasets in section 2.1 of Platt -1999 [3]_ but does not necessarily hold in general. Additionally, the +1999 [4]_ but does not necessarily hold in general. Additionally, the logistic model works best if the calibration error is symmetrical, meaning the classifier output for each binary class is normally distributed with -the same variance [6]_. This can be a problem for highly imbalanced +the same variance [7]_. This can be a problem for highly imbalanced classification problems, where outputs do not have equal variance. -In general this method is most effective when the un-calibrated model is -under-confident and has similar calibration errors for both high and low -outputs. +In general this method is most effective for small sample sizes or when the +un-calibrated model is under-confident and has similar calibration errors for both +high and low outputs. Isotonic ^^^^^^^^ -The 'isotonic' method fits a non-parametric isotonic regressor, which outputs -a step-wise non-decreasing function (see :mod:`sklearn.isotonic`). It -minimizes: +The `method="isotonic"` fits a non-parametric isotonic regressor, which outputs +a step-wise non-decreasing function, see :mod:`sklearn.isotonic`. It minimizes: .. math:: \sum_{i=1}^{n} (y_i - \hat{f}_i)^2 -subject to :math:`\hat{f}_i >= \hat{f}_j` whenever -:math:`f_i >= f_j`. :math:`y_i` is the true +subject to :math:`\hat{f}_i \geq \hat{f}_j` whenever +:math:`f_i \geq f_j`. :math:`y_i` is the true label of sample :math:`i` and :math:`\hat{f}_i` is the output of the calibrated classifier for sample :math:`i` (i.e., the calibrated probability). This method is more general when compared to 'sigmoid' as the only restriction is that the mapping function is monotonically increasing. It is thus more powerful as it can correct any monotonic distortion of the un-calibrated model. -However, it is more prone to overfitting, especially on small datasets [5]_. +However, it is more prone to overfitting, especially on small datasets [6]_. Overall, 'isotonic' will perform as well as or better than 'sigmoid' when -there is enough data (greater than ~ 1000 samples) to avoid overfitting [1]_. +there is enough data (greater than ~ 1000 samples) to avoid overfitting [3]_. + +.. note:: Impact on ranking metrics like AUC + + It is generally expected that calibration does not affect ranking metrics such as + ROC-AUC. However, these metrics might differ after calibration when using + `method="isotonic"` since isotonic regression introduces ties in the predicted + probabilities. This can be seen as within the uncertainty of the model predictions. + In case, you strictly want to keep the ranking and thus AUC scores, use + `method="sigmoid"` which is a strictly monotonic transformation and thus keeps + the ranking. Multiclass support ^^^^^^^^^^^^^^^^^^ @@ -234,7 +256,7 @@ support 1-dimensional data (e.g., binary classification output) but are extended for multiclass classification if the `base_estimator` supports multiclass predictions. For multiclass predictions, :class:`CalibratedClassifierCV` calibrates for -each class separately in a :ref:`ovr_classification` fashion [4]_. When +each class separately in a :ref:`ovr_classification` fashion [5]_. When predicting probabilities, the calibrated probabilities for each class are predicted separately. As those probabilities do not necessarily sum to @@ -249,31 +271,42 @@ one, a postprocessing is performed to normalize them. .. topic:: References: - .. [1] `Predicting Good Probabilities with Supervised Learning - `_, - A. Niculescu-Mizil & R. Caruana, ICML 2005 + .. [1] Allan H. Murphy (1973). + :doi:`"A New Vector Partition of the Probability Score" + <10.1175/1520-0450(1973)012%3C0595:ANVPOT%3E2.0.CO;2>` + Journal of Applied Meteorology and Climatology .. [2] `On the combination of forecast probabilities for consecutive precipitation periods. `_ Wea. Forecasting, 5, 640–650., Wilks, D. S., 1990a - .. [3] `Probabilistic Outputs for Support Vector Machines and Comparisons + .. [3] `Predicting Good Probabilities with Supervised Learning + `_, + A. Niculescu-Mizil & R. Caruana, ICML 2005 + + + .. [4] `Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods. `_ J. Platt, (1999) - .. [4] `Transforming Classifier Scores into Accurate Multiclass + .. [5] `Transforming Classifier Scores into Accurate Multiclass Probability Estimates. `_ B. Zadrozny & C. Elkan, (KDD 2002) - .. [5] `Predicting accurate probabilities with a ranking loss. + .. [6] `Predicting accurate probabilities with a ranking loss. `_ Menon AK, Jiang XJ, Vembu S, Elkan C, Ohno-Machado L. Proc Int Conf Mach Learn. 2012;2012:703-710 - .. [6] `Beyond sigmoids: How to obtain well-calibrated probabilities from + .. [7] `Beyond sigmoids: How to obtain well-calibrated probabilities from binary classifiers with beta calibration `_ Kull, M., Silva Filho, T. M., & Flach, P. (2017). + + .. [8] Mario V. Wüthrich, Michael Merz (2023). + :doi:`"Statistical Foundations of Actuarial Learning and its Applications" + <10.1007/978-3-031-12409-9>` + Springer Actuarial diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index d55becb0c512a..1da5b337ad7a4 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -10,6 +10,23 @@ function raw specifications may not be enough to give full guidelines on their uses. For reference on concepts repeated across the API, see :ref:`glossary`. +:mod:`sklearn`: Settings and information tools +============================================== + +.. automodule:: sklearn + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + config_context + get_config + set_config + show_versions :mod:`sklearn.base`: Base classes and utility functions ======================================================= @@ -34,7 +51,9 @@ Base classes base.DensityMixin base.RegressorMixin base.TransformerMixin + base.MetaEstimatorMixin base.OneToOneFeatureMixin + base.OutlierMixin base.ClassNamePrefixFeaturesOutMixin feature_selection.SelectorMixin @@ -49,10 +68,6 @@ Functions base.clone base.is_classifier base.is_regressor - config_context - get_config - set_config - show_versions .. _calibration_ref: @@ -104,6 +119,7 @@ Classes cluster.AgglomerativeClustering cluster.Birch cluster.DBSCAN + cluster.HDBSCAN cluster.FeatureAgglomeration cluster.KMeans cluster.BisectingKMeans @@ -147,7 +163,7 @@ details. .. currentmodule:: sklearn .. autosummary:: - :toctree: generated + :toctree: generated/ :template: class.rst compose.ColumnTransformer @@ -193,6 +209,7 @@ details. covariance.empirical_covariance covariance.graphical_lasso covariance.ledoit_wolf + covariance.ledoit_wolf_shrinkage covariance.oas covariance.shrunk_covariance @@ -351,7 +368,7 @@ Samples generator .. currentmodule:: sklearn .. autosummary:: - :toctree: generated + :toctree: generated/ :template: class.rst discriminant_analysis.LinearDiscriminantAnalysis @@ -443,6 +460,7 @@ Samples generator exceptions.DataDimensionalityWarning exceptions.EfficiencyWarning exceptions.FitFailedWarning + exceptions.InconsistentVersionWarning exceptions.NotFittedError exceptions.UndefinedMetricWarning @@ -459,7 +477,6 @@ Samples generator .. autosummary:: :toctree: generated/ - experimental.enable_hist_gradient_boosting experimental.enable_iterative_imputer experimental.enable_halving_search_cv @@ -588,7 +605,14 @@ From text gaussian_process.GaussianProcessClassifier gaussian_process.GaussianProcessRegressor -Kernels: +Kernels +------- + +.. automodule:: sklearn.gaussian_process.kernels + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn .. autosummary:: :toctree: generated/ @@ -658,7 +682,7 @@ Plotting .. autosummary:: :toctree: generated/ - :template: class.rst + :template: display_only_from_estimator.rst inspection.DecisionBoundaryDisplay inspection.PartialDependenceDisplay @@ -683,7 +707,7 @@ Plotting isotonic.IsotonicRegression .. autosummary:: - :toctree: generated + :toctree: generated/ :template: function.rst isotonic.check_increasing @@ -861,9 +885,14 @@ Miscellaneous .. autosummary:: :toctree: generated/ - :template: function.rst + :template: classes.rst linear_model.PassiveAggressiveRegressor + +.. autosummary:: + :toctree: generated/ + :template: function.rst + linear_model.enet_path linear_model.lars_path linear_model.lars_path_gram @@ -953,6 +982,7 @@ details. metrics.classification_report metrics.cohen_kappa_score metrics.confusion_matrix + metrics.d2_log_loss_score metrics.dcg_score metrics.det_curve metrics.f1_score @@ -991,6 +1021,8 @@ details. metrics.median_absolute_error metrics.mean_absolute_percentage_error metrics.r2_score + metrics.root_mean_squared_log_error + metrics.root_mean_squared_error metrics.mean_poisson_deviance metrics.mean_gamma_deviance metrics.mean_tweedie_deviance @@ -1121,7 +1153,7 @@ See the :ref:`visualizations` section of the user guide for further details. .. autosummary:: :toctree: generated/ - :template: class.rst + :template: display_all_class_methods.rst metrics.ConfusionMatrixDisplay metrics.DetCurveDisplay @@ -1217,6 +1249,17 @@ Hyper-parameter optimizers model_selection.RandomizedSearchCV model_selection.HalvingRandomSearchCV +Post-fit model tuning +--------------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + model_selection.FixedThresholdClassifier + model_selection.TunedThresholdClassifierCV Model validation ---------------- @@ -1241,9 +1284,10 @@ Visualization .. autosummary:: :toctree: generated/ - :template: class.rst + :template: display_only_from_estimator.rst model_selection.LearningCurveDisplay + model_selection.ValidationCurveDisplay .. _multiclass_ref: @@ -1259,7 +1303,7 @@ Visualization .. currentmodule:: sklearn .. autosummary:: - :toctree: generated + :toctree: generated/ :template: class.rst multiclass.OneVsRestClassifier @@ -1437,6 +1481,7 @@ details. preprocessing.RobustScaler preprocessing.SplineTransformer preprocessing.StandardScaler + preprocessing.TargetEncoder .. autosummary:: :toctree: generated/ @@ -1600,40 +1645,125 @@ Plotting :toctree: generated/ :template: function.rst - utils.arrayfuncs.min_pos utils.as_float_array utils.assert_all_finite + utils.deprecated + utils.estimator_html_repr + utils.gen_batches + utils.gen_even_slices + utils.indexable + utils.murmurhash3_32 + utils.resample + utils._safe_indexing + utils.safe_mask + utils.safe_sqr + utils.shuffle + +Input and parameter validation +------------------------------ + +.. automodule:: sklearn.utils.validation + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + utils.check_X_y utils.check_array utils.check_scalar utils.check_consistent_length utils.check_random_state + utils.validation.check_is_fitted + utils.validation.check_memory + utils.validation.check_symmetric + utils.validation.column_or_1d + utils.validation.has_fit_parameter + +Utilities used in meta-estimators +--------------------------------- + +.. automodule:: sklearn.utils.metaestimators + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + utils.metaestimators.available_if + +Utilities to handle weights based on class labels +------------------------------------------------- + +.. automodule:: sklearn.utils.class_weight + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + utils.class_weight.compute_class_weight utils.class_weight.compute_sample_weight - utils.deprecated - utils.estimator_checks.check_estimator - utils.estimator_checks.parametrize_with_checks - utils.estimator_html_repr + +Utilities to deal with multiclass target in classifiers +------------------------------------------------------- + +.. automodule:: sklearn.utils.multiclass + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + utils.multiclass.type_of_target + utils.multiclass.is_multilabel + utils.multiclass.unique_labels + +Utilities for optimal mathematical operations +--------------------------------------------- + +.. automodule:: sklearn.utils.extmath + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + utils.extmath.safe_sparse_dot utils.extmath.randomized_range_finder utils.extmath.randomized_svd utils.extmath.fast_logdet utils.extmath.density utils.extmath.weighted_mode - utils.gen_batches - utils.gen_even_slices - utils.graph.single_source_shortest_path_length - utils.indexable - utils.metaestimators.available_if - utils.multiclass.type_of_target - utils.multiclass.is_multilabel - utils.multiclass.unique_labels - utils.murmurhash3_32 - utils.resample - utils._safe_indexing - utils.safe_mask - utils.safe_sqr - utils.shuffle + +Utilities to work with sparse matrices and arrays +------------------------------------------------- + +.. automodule:: sklearn.utils.sparsefuncs + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + utils.sparsefuncs.incr_mean_variance_axis utils.sparsefuncs.inplace_column_scale utils.sparsefuncs.inplace_row_scale @@ -1641,16 +1771,98 @@ Plotting utils.sparsefuncs.inplace_swap_column utils.sparsefuncs.mean_variance_axis utils.sparsefuncs.inplace_csr_column_scale + +.. automodule:: sklearn.utils.sparsefuncs_fast + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + utils.sparsefuncs_fast.inplace_csr_row_normalize_l1 utils.sparsefuncs_fast.inplace_csr_row_normalize_l2 + +Utilities to work with graphs +----------------------------- + +.. automodule:: sklearn.utils.graph + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + utils.graph.single_source_shortest_path_length + +Utilities for random sampling +----------------------------- + +.. automodule:: sklearn.utils.random + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + utils.random.sample_without_replacement - utils.validation.check_is_fitted - utils.validation.check_memory - utils.validation.check_symmetric - utils.validation.column_or_1d - utils.validation.has_fit_parameter -Specific utilities to list scikit-learn components: + +Utilities to operate on arrays +------------------------------ + +.. automodule:: sklearn.utils.arrayfuncs + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + utils.arrayfuncs.min_pos + +Metadata routing +---------------- + +.. automodule:: sklearn.utils.metadata_routing + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + utils.metadata_routing.get_routing_for_object + utils.metadata_routing.process_routing + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + utils.metadata_routing.MetadataRouter + utils.metadata_routing.MetadataRequest + utils.metadata_routing.MethodMapping + +Scikit-learn object discovery +----------------------------- + +.. automodule:: sklearn.utils.discovery + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn .. autosummary:: :toctree: generated/ @@ -1660,24 +1872,45 @@ Specific utilities to list scikit-learn components: utils.discovery.all_displays utils.discovery.all_functions -Utilities from joblib: +Scikit-learn compatibility checker +---------------------------------- + +.. automodule:: sklearn.utils.estimator_checks + :no-members: + :no-inherited-members: + +.. currentmodule:: sklearn .. autosummary:: :toctree: generated/ :template: function.rst - utils.parallel_backend - utils.register_parallel_backend + utils.estimator_checks.check_estimator + utils.estimator_checks.parametrize_with_checks +Utilities for parallel computing +-------------------------------- -Recently deprecated -=================== +.. automodule:: sklearn.utils.parallel + :no-members: + :no-inherited-members: -To be removed in 1.3 --------------------- +.. currentmodule:: sklearn .. autosummary:: :toctree: generated/ :template: function.rst - utils.metaestimators.if_delegate_has_method + utils.parallel.delayed + utils.parallel_backend + utils.register_parallel_backend + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + utils.parallel.Parallel + + +Recently deprecated +=================== diff --git a/doc/modules/classification_threshold.rst b/doc/modules/classification_threshold.rst new file mode 100644 index 0000000000000..712a094a43246 --- /dev/null +++ b/doc/modules/classification_threshold.rst @@ -0,0 +1,156 @@ +.. currentmodule:: sklearn.model_selection + +.. _TunedThresholdClassifierCV: + +================================================== +Tuning the decision threshold for class prediction +================================================== + +Classification is best divided into two parts: + +* the statistical problem of learning a model to predict, ideally, class probabilities; +* the decision problem to take concrete action based on those probability predictions. + +Let's take a straightforward example related to weather forecasting: the first point is +related to answering "what is the chance that it will rain tomorrow?" while the second +point is related to answering "should I take an umbrella tomorrow?". + +When it comes to the scikit-learn API, the first point is addressed providing scores +using :term:`predict_proba` or :term:`decision_function`. The former returns conditional +probability estimates :math:`P(y|X)` for each class, while the latter returns a decision +score for each class. + +The decision corresponding to the labels are obtained with :term:`predict`. In binary +classification, a decision rule or action is then defined by thresholding the scores, +leading to the prediction of a single class label for each sample. For binary +classification in scikit-learn, class labels predictions are obtained by hard-coded +cut-off rules: a positive class is predicted when the conditional probability +:math:`P(y|X)` is greater than 0.5 (obtained with :term:`predict_proba`) or if the +decision score is greater than 0 (obtained with :term:`decision_function`). + +Here, we show an example that illustrates the relation between conditional +probability estimates :math:`P(y|X)` and class labels:: + + >>> from sklearn.datasets import make_classification + >>> from sklearn.tree import DecisionTreeClassifier + >>> X, y = make_classification(random_state=0) + >>> classifier = DecisionTreeClassifier(max_depth=2, random_state=0).fit(X, y) + >>> classifier.predict_proba(X[:4]) + array([[0.94 , 0.06 ], + [0.94 , 0.06 ], + [0.0416..., 0.9583...], + [0.0416..., 0.9583...]]) + >>> classifier.predict(X[:4]) + array([0, 0, 1, 1]) + +While these hard-coded rules might at first seem reasonable as default behavior, they +are most certainly not ideal for most use cases. Let's illustrate with an example. + +Consider a scenario where a predictive model is being deployed to assist +physicians in detecting tumors. In this setting, physicians will most likely be +interested in identifying all patients with cancer and not missing anyone with cancer so +that they can provide them with the right treatment. In other words, physicians +prioritize achieving a high recall rate. This emphasis on recall comes, of course, with +the trade-off of potentially more false-positive predictions, reducing the precision of +the model. That is a risk physicians are willing to take because the cost of a missed +cancer is much higher than the cost of further diagnostic tests. Consequently, when it +comes to deciding whether to classify a patient as having cancer or not, it may be more +beneficial to classify them as positive for cancer when the conditional probability +estimate is much lower than 0.5. + +Post-tuning the decision threshold +================================== + +One solution to address the problem stated in the introduction is to tune the decision +threshold of the classifier once the model has been trained. The +:class:`~sklearn.model_selection.TunedThresholdClassifierCV` tunes this threshold using +an internal cross-validation. The optimum threshold is chosen to maximize a given +metric. + +The following image illustrates the tuning of the decision threshold for a gradient +boosting classifier. While the vanilla and tuned classifiers provide the same +:term:`predict_proba` outputs and thus the same Receiver Operating Characteristic (ROC) +and Precision-Recall curves, the class label predictions differ because of the tuned +decision threshold. The vanilla classifier predicts the class of interest for a +conditional probability greater than 0.5 while the tuned classifier predicts the class +of interest for a very low probability (around 0.02). This decision threshold optimizes +a utility metric defined by the business (in this case an insurance company). + +.. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cost_sensitive_learning_002.png + :target: ../auto_examples/model_selection/plot_cost_sensitive_learning.html + :align: center + +Options to tune the decision threshold +-------------------------------------- + +The decision threshold can be tuned through different strategies controlled by the +parameter `scoring`. + +One way to tune the threshold is by maximizing a pre-defined scikit-learn metric. These +metrics can be found by calling the function :func:`~sklearn.metrics.get_scorer_names`. +By default, the balanced accuracy is the metric used but be aware that one should choose +a meaningful metric for their use case. + +.. note:: + + It is important to notice that these metrics come with default parameters, notably + the label of the class of interest (i.e. `pos_label`). Thus, if this label is not + the right one for your application, you need to define a scorer and pass the right + `pos_label` (and additional parameters) using the + :func:`~sklearn.metrics.make_scorer`. Refer to :ref:`scoring` to get + information to define your own scoring function. For instance, we show how to pass + the information to the scorer that the label of interest is `0` when maximizing the + :func:`~sklearn.metrics.f1_score`:: + + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.model_selection import TunedThresholdClassifierCV + >>> from sklearn.metrics import make_scorer, f1_score + >>> X, y = make_classification( + ... n_samples=1_000, weights=[0.1, 0.9], random_state=0) + >>> pos_label = 0 + >>> scorer = make_scorer(f1_score, pos_label=pos_label) + >>> base_model = LogisticRegression() + >>> model = TunedThresholdClassifierCV(base_model, scoring=scorer) + >>> scorer(model.fit(X, y), X, y) + 0.88... + >>> # compare it with the internal score found by cross-validation + >>> model.best_score_ + 0.86... + +Important notes regarding the internal cross-validation +------------------------------------------------------- + +By default :class:`~sklearn.model_selection.TunedThresholdClassifierCV` uses a 5-fold +stratified cross-validation to tune the decision threshold. The parameter `cv` allows to +control the cross-validation strategy. It is possible to bypass cross-validation by +setting `cv="prefit"` and providing a fitted classifier. In this case, the decision +threshold is tuned on the data provided to the `fit` method. + +However, you should be extremely careful when using this option. You should never use +the same data for training the classifier and tuning the decision threshold due to the +risk of overfitting. Refer to the following example section for more details (cf. +:ref:`TunedThresholdClassifierCV_no_cv`). If you have limited resources, consider using +a float number for `cv` to limit to an internal single train-test split. + +The option `cv="prefit"` should only be used when the provided classifier was already +trained, and you just want to find the best decision threshold using a new validation +set. + +.. _FixedThresholdClassifier: + +Manually setting the decision threshold +--------------------------------------- + +The previous sections discussed strategies to find an optimal decision threshold. It is +also possible to manually set the decision threshold using the class +:class:`~sklearn.model_selection.FixedThresholdClassifier`. + +Examples +-------- + +- See the example entitled + :ref:`sphx_glr_auto_examples_model_selection_plot_tuned_decision_threshold.py`, + to get insights on the post-tuning of the decision threshold. +- See the example entitled + :ref:`sphx_glr_auto_examples_model_selection_plot_cost_sensitive_learning.py`, + to learn about cost-sensitive learning and decision threshold tuning. diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index 5ca3a6f46b672..ed27b369171e5 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -93,6 +93,13 @@ Overview of clustering methods transductive - Distances between nearest points + * - :ref:`HDBSCAN ` + - minimum cluster membership, minimum point neighbors + - large ``n_samples``, medium ``n_clusters`` + - Non-flat geometry, uneven cluster sizes, outlier removal, + transductive, hierarchical, variable cluster density + - Distances between nearest points + * - :ref:`OPTICS ` - minimum cluster membership - Very large ``n_samples``, large ``n_clusters`` @@ -170,11 +177,15 @@ It suffers from various drawbacks: k-means clustering can alleviate this problem and speed up the computations. -.. image:: ../auto_examples/cluster/images/sphx_glr_plot_kmeans_assumptions_001.png +.. image:: ../auto_examples/cluster/images/sphx_glr_plot_kmeans_assumptions_002.png :target: ../auto_examples/cluster/plot_kmeans_assumptions.html :align: center :scale: 50 +For more detailed descriptions of the issues shown above and how to address them, +refer to the examples :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py` +and :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`. + K-means is often referred to as Lloyd's algorithm. In basic terms, the algorithm has three steps. The first step chooses the initial centroids, with the most basic method being to choose :math:`k` samples from the dataset @@ -211,7 +222,9 @@ initializations of the centroids. One method to help address this issue is the k-means++ initialization scheme, which has been implemented in scikit-learn (use the ``init='k-means++'`` parameter). This initializes the centroids to be (generally) distant from each other, leading to probably better results than -random initialization, as shown in the reference. +random initialization, as shown in the reference. For a detailed example of +comaparing different initialization schemes, refer to +:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`. K-means++ can also be called independently to select seeds for other clustering algorithms, see :func:`sklearn.cluster.kmeans_plusplus` for details @@ -224,7 +237,17 @@ weight of 2 to a sample is equivalent to adding a duplicate of that sample to the dataset :math:`X`. K-means can be used for vector quantization. This is achieved using the -transform method of a trained model of :class:`KMeans`. +``transform`` method of a trained model of :class:`KMeans`. For an example of +performing vector quantization on an image refer to +:ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`. + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_cluster_plot_cluster_iris.py`: Example usage of + :class:`KMeans` using the iris dataset + + * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering + using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data Low-level parallelism --------------------- @@ -236,17 +259,22 @@ threads, please refer to our :ref:`parallelism` notes. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating when - k-means performs intuitively and when it does not - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering handwritten digits + * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating + when k-means performs intuitively and when it does not + * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering + handwritten digits -.. topic:: References: - * `"k-means++: The advantages of careful seeding" - `_ - Arthur, David, and Sergei Vassilvitskii, - *Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete - algorithms*, Society for Industrial and Applied Mathematics (2007) +|details-start| +**References** +|details-split| + +* `"k-means++: The advantages of careful seeding" + `_ Arthur, David, and + Sergei Vassilvitskii, *Proceedings of the eighteenth annual ACM-SIAM symposium + on Discrete algorithms*, Society for Industrial and Applied Mathematics (2007) + +|details-end| .. _mini_batch_kmeans: @@ -284,21 +312,22 @@ small, as shown in the example and cited reference. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`: Comparison of KMeans and - MiniBatchKMeans + * :ref:`sphx_glr_auto_examples_cluster_plot_mini_batch_kmeans.py`: Comparison of + :class:`KMeans` and :class:`MiniBatchKMeans` - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering using sparse - MiniBatchKMeans + * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering + using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data - * :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` +|details-start| +**References** +|details-split| +* `"Web Scale K-Means clustering" + `_ + D. Sculley, *Proceedings of the 19th international conference on World + wide web* (2010) -.. topic:: References: - - * `"Web Scale K-Means clustering" - `_ - D. Sculley, *Proceedings of the 19th international conference on World - wide web* (2010) +|details-end| .. _affinity_propagation: @@ -335,53 +364,57 @@ convergence. Further, the memory complexity is of the order sparse similarity matrix is used. This makes Affinity Propagation most appropriate for small to medium sized datasets. -.. topic:: Examples: +|details-start| +**Algorithm description** +|details-split| - * :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity - Propagation on a synthetic 2D datasets with 3 classes. - - * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity Propagation on - Financial time series to find groups of companies - - -**Algorithm description:** The messages sent between points belong to one of two categories. The first is -the responsibility :math:`r(i, k)`, -which is the accumulated evidence that sample :math:`k` -should be the exemplar for sample :math:`i`. -The second is the availability :math:`a(i, k)` -which is the accumulated evidence that sample :math:`i` -should choose sample :math:`k` to be its exemplar, -and considers the values for all other samples that :math:`k` should -be an exemplar. In this way, exemplars are chosen by samples if they are (1) -similar enough to many samples and (2) chosen by many samples to be -representative of themselves. - -More formally, the responsibility of a sample :math:`k` -to be the exemplar of sample :math:`i` is given by: +the responsibility :math:`r(i, k)`, which is the accumulated evidence that +sample :math:`k` should be the exemplar for sample :math:`i`. The second is the +availability :math:`a(i, k)` which is the accumulated evidence that sample +:math:`i` should choose sample :math:`k` to be its exemplar, and considers the +values for all other samples that :math:`k` should be an exemplar. In this way, +exemplars are chosen by samples if they are (1) similar enough to many samples +and (2) chosen by many samples to be representative of themselves. + +More formally, the responsibility of a sample :math:`k` to be the exemplar of +sample :math:`i` is given by: .. math:: r(i, k) \leftarrow s(i, k) - max [ a(i, k') + s(i, k') \forall k' \neq k ] Where :math:`s(i, k)` is the similarity between samples :math:`i` and :math:`k`. -The availability of sample :math:`k` -to be the exemplar of sample :math:`i` is given by: +The availability of sample :math:`k` to be the exemplar of sample :math:`i` is +given by: .. math:: - a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', k)}] + a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', + k)}] -To begin with, all values for :math:`r` and :math:`a` are set to zero, -and the calculation of each iterates until convergence. -As discussed above, in order to avoid numerical oscillations when updating the -messages, the damping factor :math:`\lambda` is introduced to iteration process: +To begin with, all values for :math:`r` and :math:`a` are set to zero, and the +calculation of each iterates until convergence. As discussed above, in order to +avoid numerical oscillations when updating the messages, the damping factor +:math:`\lambda` is introduced to iteration process: .. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) .. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) where :math:`t` indicates the iteration times. +|details-end| + + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity + Propagation on a synthetic 2D datasets with 3 classes. + + * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity + Propagation on Financial time series to find groups of companies + + .. _mean_shift: Mean Shift @@ -392,22 +425,43 @@ for centroids to be the mean of the points within a given region. These candidates are then filtered in a post-processing stage to eliminate near-duplicates to form the final set of centroids. -Given a candidate centroid :math:`x_i` for iteration :math:`t`, the candidate -is updated according to the following equation: +|details-start| +**Mathematical details** +|details-split| + +The position of centroid candidates is iteratively adjusted using a technique +called hill climbing, which finds local maxima of the estimated probability +density. Given a candidate centroid :math:`x` for iteration :math:`t`, the +candidate is updated according to the following equation: + +.. math:: + + x^{t+1} = x^t + m(x^t) + +Where :math:`m` is the *mean shift* vector that is computed for each centroid +that points towards a region of the maximum increase in the density of points. +To compute :math:`m` we define :math:`N(x)` as the neighborhood of samples +within a given distance around :math:`x`. Then :math:`m` is computed using the +following equation, effectively updating a centroid to be the mean of the +samples within its neighborhood: .. math:: - x_i^{t+1} = m(x_i^t) + m(x) = \frac{1}{|N(x)|} \sum_{x_j \in N(x)}x_j - x -Where :math:`N(x_i)` is the neighborhood of samples within a given distance -around :math:`x_i` and :math:`m` is the *mean shift* vector that is computed for each -centroid that points towards a region of the maximum increase in the density of points. -This is computed using the following equation, effectively updating a centroid -to be the mean of the samples within its neighborhood: +In general, the equation for :math:`m` depends on a kernel used for density +estimation. The generic formula is: .. math:: - m(x_i) = \frac{\sum_{x_j \in N(x_i)}K(x_j - x_i)x_j}{\sum_{x_j \in N(x_i)}K(x_j - x_i)} + m(x) = \frac{\sum_{x_j \in N(x)}K(x_j - x)x_j}{\sum_{x_j \in N(x)}K(x_j - + x)} - x + +In our implementation, :math:`K(x)` is equal to 1 if :math:`x` is small enough +and is equal to 0 otherwise. Effectively :math:`K(y - x)` indicates whether +:math:`y` is in the neighborhood of :math:`x`. + +|details-end| The algorithm automatically sets the number of clusters, instead of relying on a parameter ``bandwidth``, which dictates the size of the region to search through. @@ -431,15 +485,19 @@ given sample. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift clustering - on a synthetic 2D datasets with 3 classes. + * :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift + clustering on a synthetic 2D datasets with 3 classes. -.. topic:: References: - * :doi:`"Mean shift: A robust approach toward feature space analysis" - <10.1109/34.1000236>` - D. Comaniciu and P. Meer, *IEEE Transactions on Pattern Analysis and Machine Intelligence* (2002) +|details-start| +**References** +|details-split| + +* :doi:`"Mean shift: A robust approach toward feature space analysis" + <10.1109/34.1000236>` D. Comaniciu and P. Meer, *IEEE Transactions on Pattern + Analysis and Machine Intelligence* (2002) +|details-end| .. _spectral_clustering: @@ -491,23 +549,24 @@ computed using a function of a gradient of the image. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting objects - from a noisy background using spectral clustering. + * :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting + objects from a noisy background using spectral clustering. - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral clustering - to split the image of coins in regions. + * :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral + clustering to split the image of coins in regions. .. |coin_kmeans| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_001.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 .. |coin_discretize| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_002.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 .. |coin_cluster_qr| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_003.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 + Different label assignment strategies ------------------------------------- @@ -529,14 +588,18 @@ below. |coin_kmeans| |coin_discretize| |coin_cluster_qr| ================================ ================================ ================================ -.. topic:: References: +|details-start| +**References** +|details-split| - * `"Multiclass spectral clustering" - `_ - Stella X. Yu, Jianbo Shi, 2003 +* `"Multiclass spectral clustering" + `_ + Stella X. Yu, Jianbo Shi, 2003 - * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` - Anil Damle, Victor Minden, Lexing Ying, 2019 +* :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` + Anil Damle, Victor Minden, Lexing Ying, 2019 + +|details-end| .. _spectral_clustering_graph: @@ -552,28 +615,28 @@ graph, and SpectralClustering is initialized with `affinity='precomputed'`:: ... assign_labels='discretize') >>> sc.fit_predict(adjacency_matrix) # doctest: +SKIP -.. topic:: References: +|details-start| +**References** +|details-split| - * :doi:`"A Tutorial on Spectral Clustering" - <10.1007/s11222-007-9033-z>` - Ulrike von Luxburg, 2007 +* :doi:`"A Tutorial on Spectral Clustering" <10.1007/s11222-007-9033-z>` Ulrike + von Luxburg, 2007 - * :doi:`"Normalized cuts and image segmentation" - <10.1109/34.868688>` - Jianbo Shi, Jitendra Malik, 2000 +* :doi:`"Normalized cuts and image segmentation" <10.1109/34.868688>` Jianbo + Shi, Jitendra Malik, 2000 - * `"A Random Walks View of Spectral Segmentation" - `_ - Marina Meila, Jianbo Shi, 2001 +* `"A Random Walks View of Spectral Segmentation" + `_ + Marina Meila, Jianbo Shi, 2001 - * `"On Spectral Clustering: Analysis and an algorithm" - `_ - Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 +* `"On Spectral Clustering: Analysis and an algorithm" + `_ + Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 - * :arxiv:`"Preconditioned Spectral Clustering for Stochastic - Block Partition Streaming Graph Challenge" - <1708.07481>` - David Zhuzhunashvili, Andrew Knyazev +* :arxiv:`"Preconditioned Spectral Clustering for Stochastic Block Partition + Streaming Graph Challenge" <1708.07481>` David Zhuzhunashvili, Andrew Knyazev + +|details-end| .. _hierarchical_clustering: @@ -636,8 +699,12 @@ Single linkage can also perform well on non-globular data. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of the - different linkage strategies in a real dataset. + * :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of + the different linkage strategies in a real dataset. + + * :ref:`sphx_glr_auto_examples_cluster_plot_linkage_comparison.py`: exploration of + the different linkage strategies in toy datasets. + Visualization of cluster hierarchy ---------------------------------- @@ -650,6 +717,9 @@ of the data, though more so in the case of small sample sizes. :target: ../auto_examples/cluster/plot_agglomerative_dendrogram.html :scale: 42 +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_dendrogram.py` Adding connectivity constraints @@ -691,21 +761,6 @@ using :func:`sklearn.feature_extraction.image.grid_to_graph` to enable only merging of neighboring pixels on an image, as in the :ref:`coin ` example. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward clustering - to split the image of coins in regions. - - * :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example of - Ward algorithm on a swiss-roll, comparison of structured approaches - versus unstructured approaches. - - * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: - Example of dimensionality reduction with feature agglomeration based on - Ward hierarchical clustering. - - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` - .. warning:: **Connectivity constraints with single, average and complete linkage** Connectivity constraints and single, complete or average linkage can enhance @@ -733,6 +788,21 @@ enable only merging of neighboring pixels on an image, as in the :target: ../auto_examples/cluster/plot_agglomerative_clustering.html :scale: 38 +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward + clustering to split the image of coins in regions. + + * :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example + of Ward algorithm on a swiss-roll, comparison of structured approaches + versus unstructured approaches. + + * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: Example + of dimensionality reduction with feature agglomeration based on Ward + hierarchical clustering. + + * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` + Varying the metric ------------------- @@ -767,7 +837,8 @@ each class. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` + * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` + Bisecting K-Means ----------------- @@ -810,24 +881,26 @@ Difference between Bisecting K-Means and regular K-Means can be seen on example While the regular K-Means algorithm tends to create non-related clusters, clusters from Bisecting K-Means are well ordered and create quite a visible hierarchy. -.. topic:: References: - - * `"A Comparison of Document Clustering Techniques" - `_ - Michael Steinbach, George Karypis and Vipin Kumar, - Department of Computer Science and Egineering, University of Minnesota - (June 2000) - * `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog Data" - `_ - K.Abirami and Dr.P.Mayilvahanan, - International Journal of Emerging Technologies in Engineering Research (IJETER) - Volume 4, Issue 8, (August 2016) - * `"Bisecting K-means Algorithm Based on K-valued Self-determining - and Clustering Center Optimization" - `_ - Jian Di, Xinyue Gou - School of Control and Computer Engineering,North China Electric Power University, - Baoding, Hebei, China (August 2017) +|details-start| +**References** +|details-split| + +* `"A Comparison of Document Clustering Techniques" + `_ Michael + Steinbach, George Karypis and Vipin Kumar, Department of Computer Science and + Egineering, University of Minnesota (June 2000) +* `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog + Data" + `_ + K.Abirami and Dr.P.Mayilvahanan, International Journal of Emerging + Technologies in Engineering Research (IJETER) Volume 4, Issue 8, (August 2016) +* `"Bisecting K-means Algorithm Based on K-valued Self-determining and + Clustering Center Optimization" + `_ Jian Di, Xinyue Gou School + of Control and Computer Engineering,North China Electric Power University, + Baoding, Hebei, China (August 2017) + +|details-end| .. _dbscan: @@ -890,62 +963,180 @@ by black points below. * :ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py` -.. topic:: Implementation - - The DBSCAN algorithm is deterministic, always generating the same clusters - when given the same data in the same order. However, the results can differ when - data is provided in a different order. First, even though the core samples - will always be assigned to the same clusters, the labels of those clusters - will depend on the order in which those samples are encountered in the data. - Second and more importantly, the clusters to which non-core samples are assigned - can differ depending on the data order. This would happen when a non-core sample - has a distance lower than ``eps`` to two core samples in different clusters. By the - triangular inequality, those two core samples must be more distant than - ``eps`` from each other, or they would be in the same cluster. The non-core - sample is assigned to whichever cluster is generated first in a pass - through the data, and so the results will depend on the data ordering. - - The current implementation uses ball trees and kd-trees - to determine the neighborhood of points, - which avoids calculating the full distance matrix - (as was done in scikit-learn versions before 0.14). - The possibility to use custom metrics is retained; - for details, see :class:`NearestNeighbors`. - -.. topic:: Memory consumption for large sample sizes - - This implementation is by default not memory efficient because it constructs - a full pairwise similarity matrix in the case where kd-trees or ball-trees cannot - be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` floats. - A couple of mechanisms for getting around this are: - - - Use :ref:`OPTICS ` clustering in conjunction with the - `extract_dbscan` method. OPTICS clustering also calculates the full - pairwise matrix, but only keeps one row in memory at a time (memory - complexity n). - - - A sparse radius neighborhood graph (where missing entries are presumed to - be out of eps) can be precomputed in a memory-efficient way and dbscan - can be run over this with ``metric='precomputed'``. See - :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. - - - The dataset can be compressed, either by removing exact duplicates if - these occur in your data, or by using BIRCH. Then you only have a - relatively small number of representatives for a large number of points. - You can then provide a ``sample_weight`` when fitting DBSCAN. +|details-start| +**Implementation** +|details-split| + +The DBSCAN algorithm is deterministic, always generating the same clusters when +given the same data in the same order. However, the results can differ when +data is provided in a different order. First, even though the core samples will +always be assigned to the same clusters, the labels of those clusters will +depend on the order in which those samples are encountered in the data. Second +and more importantly, the clusters to which non-core samples are assigned can +differ depending on the data order. This would happen when a non-core sample +has a distance lower than ``eps`` to two core samples in different clusters. By +the triangular inequality, those two core samples must be more distant than +``eps`` from each other, or they would be in the same cluster. The non-core +sample is assigned to whichever cluster is generated first in a pass through the +data, and so the results will depend on the data ordering. + +The current implementation uses ball trees and kd-trees to determine the +neighborhood of points, which avoids calculating the full distance matrix (as +was done in scikit-learn versions before 0.14). The possibility to use custom +metrics is retained; for details, see :class:`NearestNeighbors`. + +|details-end| + +|details-start| +**Memory consumption for large sample sizes** +|details-split| + +This implementation is by default not memory efficient because it constructs a +full pairwise similarity matrix in the case where kd-trees or ball-trees cannot +be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` +floats. A couple of mechanisms for getting around this are: + +- Use :ref:`OPTICS ` clustering in conjunction with the `extract_dbscan` + method. OPTICS clustering also calculates the full pairwise matrix, but only + keeps one row in memory at a time (memory complexity n). + +- A sparse radius neighborhood graph (where missing entries are presumed to be + out of eps) can be precomputed in a memory-efficient way and dbscan can be run + over this with ``metric='precomputed'``. See + :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. + +- The dataset can be compressed, either by removing exact duplicates if these + occur in your data, or by using BIRCH. Then you only have a relatively small + number of representatives for a large number of points. You can then provide a + ``sample_weight`` when fitting DBSCAN. + +|details-end| + +|details-start| +**References** +|details-split| + +* `A Density-Based Algorithm for Discovering Clusters in Large Spatial + Databases with Noise `_ + Ester, M., H. P. Kriegel, J. Sander, and X. Xu, In Proceedings of the 2nd + International Conference on Knowledge Discovery and Data Mining, Portland, OR, + AAAI Press, pp. 226–231. 1996 + +* :doi:`DBSCAN revisited, revisited: why and how you should (still) use DBSCAN. + <10.1145/3068335>` Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, + X. (2017). In ACM Transactions on Database Systems (TODS), 42(3), 19. + +|details-end| + +.. _hdbscan: + +HDBSCAN +======= + +The :class:`HDBSCAN` algorithm can be seen as an extension of :class:`DBSCAN` +and :class:`OPTICS`. Specifically, :class:`DBSCAN` assumes that the clustering +criterion (i.e. density requirement) is *globally homogeneous*. +In other words, :class:`DBSCAN` may struggle to successfully capture clusters +with different densities. +:class:`HDBSCAN` alleviates this assumption and explores all possible density +scales by building an alternative representation of the clustering problem. + +.. note:: + + This implementation is adapted from the original implementation of HDBSCAN, + `scikit-learn-contrib/hdbscan `_ based on [LJ2017]_. + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_cluster_plot_hdbscan.py` + +Mutual Reachability Graph +------------------------- + +HDBSCAN first defines :math:`d_c(x_p)`, the *core distance* of a sample :math:`x_p`, as the +distance to its `min_samples` th-nearest neighbor, counting itself. For example, +if `min_samples=5` and :math:`x_*` is the 5th-nearest neighbor of :math:`x_p` +then the core distance is: + +.. math:: d_c(x_p)=d(x_p, x_*). + +Next it defines :math:`d_m(x_p, x_q)`, the *mutual reachability distance* of two points +:math:`x_p, x_q`, as: + +.. math:: d_m(x_p, x_q) = \max\{d_c(x_p), d_c(x_q), d(x_p, x_q)\} + +These two notions allow us to construct the *mutual reachability graph* +:math:`G_{ms}` defined for a fixed choice of `min_samples` by associating each +sample :math:`x_p` with a vertex of the graph, and thus edges between points +:math:`x_p, x_q` are the mutual reachability distance :math:`d_m(x_p, x_q)` +between them. We may build subsets of this graph, denoted as +:math:`G_{ms,\varepsilon}`, by removing any edges with value greater than :math:`\varepsilon`: +from the original graph. Any points whose core distance is less than :math:`\varepsilon`: +are at this staged marked as noise. The remaining points are then clustered by +finding the connected components of this trimmed graph. + +.. note:: + + Taking the connected components of a trimmed graph :math:`G_{ms,\varepsilon}` is + equivalent to running DBSCAN* with `min_samples` and :math:`\varepsilon`. DBSCAN* is a + slightly modified version of DBSCAN mentioned in [CM2013]_. + +Hierarchical Clustering +----------------------- +HDBSCAN can be seen as an algorithm which performs DBSCAN* clustering across all +values of :math:`\varepsilon`. As mentioned prior, this is equivalent to finding the connected +components of the mutual reachability graphs for all values of :math:`\varepsilon`. To do this +efficiently, HDBSCAN first extracts a minimum spanning tree (MST) from the fully +-connected mutual reachability graph, then greedily cuts the edges with highest +weight. An outline of the HDBSCAN algorithm is as follows: + +1. Extract the MST of :math:`G_{ms}`. +2. Extend the MST by adding a "self edge" for each vertex, with weight equal + to the core distance of the underlying sample. +3. Initialize a single cluster and label for the MST. +4. Remove the edge with the greatest weight from the MST (ties are + removed simultaneously). +5. Assign cluster labels to the connected components which contain the + end points of the now-removed edge. If the component does not have at least + one edge it is instead assigned a "null" label marking it as noise. +6. Repeat 4-5 until there are no more connected components. + +HDBSCAN is therefore able to obtain all possible partitions achievable by +DBSCAN* for a fixed choice of `min_samples` in a hierarchical fashion. +Indeed, this allows HDBSCAN to perform clustering across multiple densities +and as such it no longer needs :math:`\varepsilon` to be given as a hyperparameter. Instead +it relies solely on the choice of `min_samples`, which tends to be a more robust +hyperparameter. + +.. |hdbscan_ground_truth| image:: ../auto_examples/cluster/images/sphx_glr_plot_hdbscan_005.png + :target: ../auto_examples/cluster/plot_hdbscan.html + :scale: 75 +.. |hdbscan_results| image:: ../auto_examples/cluster/images/sphx_glr_plot_hdbscan_007.png + :target: ../auto_examples/cluster/plot_hdbscan.html + :scale: 75 + +.. centered:: |hdbscan_ground_truth| +.. centered:: |hdbscan_results| + +HDBSCAN can be smoothed with an additional hyperparameter `min_cluster_size` +which specifies that during the hierarchical clustering, components with fewer +than `minimum_cluster_size` many samples are considered noise. In practice, one +can set `minimum_cluster_size = min_samples` to couple the parameters and +simplify the hyperparameter space. .. topic:: References: - * `"A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases - with Noise" `_ - Ester, M., H. P. Kriegel, J. Sander, and X. Xu, - In Proceedings of the 2nd International Conference on Knowledge Discovery - and Data Mining, Portland, OR, AAAI Press, pp. 226–231. 1996 + .. [CM2013] Campello, R.J.G.B., Moulavi, D., Sander, J. (2013). Density-Based + Clustering Based on Hierarchical Density Estimates. In: Pei, J., Tseng, V.S., + Cao, L., Motoda, H., Xu, G. (eds) Advances in Knowledge Discovery and Data + Mining. PAKDD 2013. Lecture Notes in Computer Science(), vol 7819. Springer, + Berlin, Heidelberg. :doi:`Density-Based Clustering Based on Hierarchical + Density Estimates <10.1007/978-3-642-37456-2_14>` - * :doi:`"DBSCAN revisited, revisited: why and how you should (still) use DBSCAN." - <10.1145/3068335>` - Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017). - In ACM Transactions on Database Systems (TODS), 42(3), 19. + .. [LJ2017] L. McInnes and J. Healy, (2017). Accelerated Hierarchical Density + Based Clustering. In: IEEE International Conference on Data Mining Workshops + (ICDMW), 2017, pp. 33-42. :doi:`Accelerated Hierarchical Density Based + Clustering <10.1109/ICDMW.2017.12>` .. _optics: @@ -993,45 +1184,56 @@ represented as children of a larger parent cluster. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` + * :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` -.. topic:: Comparison with DBSCAN +|details-start| +**Comparison with DBSCAN** +|details-split| - The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are - very similar, but not always identical; specifically, labeling of periphery - and noise points. This is in part because the first samples of each dense - area processed by OPTICS have a large reachability value while being close - to other points in their area, and will thus sometimes be marked as noise - rather than periphery. This affects adjacent points when they are - considered as candidates for being marked as either periphery or noise. +The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are very +similar, but not always identical; specifically, labeling of periphery and noise +points. This is in part because the first samples of each dense area processed +by OPTICS have a large reachability value while being close to other points in +their area, and will thus sometimes be marked as noise rather than periphery. +This affects adjacent points when they are considered as candidates for being +marked as either periphery or noise. - Note that for any single value of ``eps``, DBSCAN will tend to have a - shorter run time than OPTICS; however, for repeated runs at varying ``eps`` - values, a single run of OPTICS may require less cumulative runtime than - DBSCAN. It is also important to note that OPTICS' output is close to - DBSCAN's only if ``eps`` and ``max_eps`` are close. +Note that for any single value of ``eps``, DBSCAN will tend to have a shorter +run time than OPTICS; however, for repeated runs at varying ``eps`` values, a +single run of OPTICS may require less cumulative runtime than DBSCAN. It is also +important to note that OPTICS' output is close to DBSCAN's only if ``eps`` and +``max_eps`` are close. -.. topic:: Computational Complexity +|details-end| - Spatial indexing trees are used to avoid calculating the full distance - matrix, and allow for efficient memory usage on large sets of samples. - Different distance metrics can be supplied via the ``metric`` keyword. +|details-start| +**Computational Complexity** +|details-split| - For large datasets, similar (but not identical) results can be obtained via - `HDBSCAN `_. The HDBSCAN implementation is - multithreaded, and has better algorithmic runtime complexity than OPTICS, - at the cost of worse memory scaling. For extremely large datasets that - exhaust system memory using HDBSCAN, OPTICS will maintain :math:`n` (as opposed - to :math:`n^2`) memory scaling; however, tuning of the ``max_eps`` parameter - will likely need to be used to give a solution in a reasonable amount of - wall time. +Spatial indexing trees are used to avoid calculating the full distance matrix, +and allow for efficient memory usage on large sets of samples. Different +distance metrics can be supplied via the ``metric`` keyword. -.. topic:: References: +For large datasets, similar (but not identical) results can be obtained via +:class:`HDBSCAN`. The HDBSCAN implementation is multithreaded, and has better +algorithmic runtime complexity than OPTICS, at the cost of worse memory scaling. +For extremely large datasets that exhaust system memory using HDBSCAN, OPTICS +will maintain :math:`n` (as opposed to :math:`n^2`) memory scaling; however, +tuning of the ``max_eps`` parameter will likely need to be used to give a +solution in a reasonable amount of wall time. + +|details-end| + +|details-start| +**References** +|details-split| + +* "OPTICS: ordering points to identify the clustering structure." Ankerst, + Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander. In ACM Sigmod + Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. - * "OPTICS: ordering points to identify the clustering structure." - Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander. - In ACM Sigmod Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. +|details-end| .. _birch: @@ -1067,60 +1269,75 @@ If ``n_clusters`` is set to None, the subclusters from the leaves are directly read off, otherwise a global clustering step labels these subclusters into global clusters (labels) and the samples are mapped to the global label of the nearest subcluster. -**Algorithm description:** +|details-start| +**Algorithm description** +|details-split| -- A new sample is inserted into the root of the CF Tree which is a CF Node. - It is then merged with the subcluster of the root, that has the smallest - radius after merging, constrained by the threshold and branching factor conditions. - If the subcluster has any child node, then this is done repeatedly till it reaches - a leaf. After finding the nearest subcluster in the leaf, the properties of this - subcluster and the parent subclusters are recursively updated. +- A new sample is inserted into the root of the CF Tree which is a CF Node. It + is then merged with the subcluster of the root, that has the smallest radius + after merging, constrained by the threshold and branching factor conditions. + If the subcluster has any child node, then this is done repeatedly till it + reaches a leaf. After finding the nearest subcluster in the leaf, the + properties of this subcluster and the parent subclusters are recursively + updated. - If the radius of the subcluster obtained by merging the new sample and the nearest subcluster is greater than the square of the threshold and if the - number of subclusters is greater than the branching factor, then a space is temporarily - allocated to this new sample. The two farthest subclusters are taken and - the subclusters are divided into two groups on the basis of the distance - between these subclusters. + number of subclusters is greater than the branching factor, then a space is + temporarily allocated to this new sample. The two farthest subclusters are + taken and the subclusters are divided into two groups on the basis of the + distance between these subclusters. -- If this split node has a parent subcluster and there is room - for a new subcluster, then the parent is split into two. If there is no room, - then this node is again split into two and the process is continued - recursively, till it reaches the root. +- If this split node has a parent subcluster and there is room for a new + subcluster, then the parent is split into two. If there is no room, then this + node is again split into two and the process is continued recursively, till it + reaches the root. +|details-end| + +|details-start| **BIRCH or MiniBatchKMeans?** +|details-split| + +- BIRCH does not scale very well to high dimensional data. As a rule of thumb if + ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans. +- If the number of instances of data needs to be reduced, or if one wants a + large number of subclusters either as a preprocessing step or otherwise, + BIRCH is more useful than MiniBatchKMeans. - - BIRCH does not scale very well to high dimensional data. As a rule of thumb if - ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans. - - If the number of instances of data needs to be reduced, or if one wants a - large number of subclusters either as a preprocessing step or otherwise, - BIRCH is more useful than MiniBatchKMeans. +.. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png + :target: ../auto_examples/cluster/plot_birch_vs_minibatchkmeans.html +|details-end| +|details-start| **How to use partial_fit?** +|details-split| To avoid the computation of global clustering, for every call of ``partial_fit`` the user is advised - 1. To set ``n_clusters=None`` initially - 2. Train all data by multiple calls to partial_fit. - 3. Set ``n_clusters`` to a required value using - ``brc.set_params(n_clusters=n_clusters)``. - 4. Call ``partial_fit`` finally with no arguments, i.e. ``brc.partial_fit()`` - which performs the global clustering. +1. To set ``n_clusters=None`` initially +2. Train all data by multiple calls to partial_fit. +3. Set ``n_clusters`` to a required value using + ``brc.set_params(n_clusters=n_clusters)``. +4. Call ``partial_fit`` finally with no arguments, i.e. ``brc.partial_fit()`` + which performs the global clustering. -.. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png - :target: ../auto_examples/cluster/plot_birch_vs_minibatchkmeans.html +|details-end| -.. topic:: References: +|details-start| +**References** +|details-split| - * Tian Zhang, Raghu Ramakrishnan, Maron Livny - BIRCH: An efficient data clustering method for large databases. - https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf +* Tian Zhang, Raghu Ramakrishnan, Maron Livny BIRCH: An efficient data + clustering method for large databases. + https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf - * Roberto Perdisci - JBirch - Java implementation of BIRCH clustering algorithm - https://code.google.com/archive/p/jbirch +* Roberto Perdisci JBirch - Java implementation of BIRCH clustering algorithm + https://code.google.com/archive/p/jbirch + +|details-end| .. _clustering_evaluation: @@ -1203,105 +1420,104 @@ will not necessarily be close to zero.:: -0.07... -Advantages -~~~~~~~~~~ - -- **Interpretability**: The unadjusted Rand index is proportional - to the number of sample pairs whose labels are the same in both - `labels_pred` and `labels_true`, or are different in both. +.. topic:: Advantages: -- **Random (uniform) label assignments have an adjusted Rand index - score close to 0.0** for any value of ``n_clusters`` and - ``n_samples`` (which is not the case for the unadjusted Rand index - or the V-measure for instance). + - **Interpretability**: The unadjusted Rand index is proportional to the + number of sample pairs whose labels are the same in both `labels_pred` and + `labels_true`, or are different in both. -- **Bounded range**: Lower values indicate different labelings, - similar clusterings have a high (adjusted or unadjusted) Rand index, - 1.0 is the perfect match score. The score range is [0, 1] for the - unadjusted Rand index and [-1, 1] for the adjusted Rand index. + - **Random (uniform) label assignments have an adjusted Rand index score close + to 0.0** for any value of ``n_clusters`` and ``n_samples`` (which is not the + case for the unadjusted Rand index or the V-measure for instance). -- **No assumption is made on the cluster structure**: The (adjusted or - unadjusted) Rand index can be used to compare all kinds of - clustering algorithms, and can be used to compare clustering - algorithms such as k-means which assumes isotropic blob shapes with - results of spectral clustering algorithms which can find cluster - with "folded" shapes. + - **Bounded range**: Lower values indicate different labelings, similar + clusterings have a high (adjusted or unadjusted) Rand index, 1.0 is the + perfect match score. The score range is [0, 1] for the unadjusted Rand index + and [-1, 1] for the adjusted Rand index. + - **No assumption is made on the cluster structure**: The (adjusted or + unadjusted) Rand index can be used to compare all kinds of clustering + algorithms, and can be used to compare clustering algorithms such as k-means + which assumes isotropic blob shapes with results of spectral clustering + algorithms which can find cluster with "folded" shapes. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- Contrary to inertia, the **(adjusted or unadjusted) Rand index - requires knowledge of the ground truth classes** which is almost - never available in practice or requires manual assignment by human - annotators (as in the supervised learning setting). + - Contrary to inertia, the **(adjusted or unadjusted) Rand index requires + knowledge of the ground truth classes** which is almost never available in + practice or requires manual assignment by human annotators (as in the + supervised learning setting). - However (adjusted or unadjusted) Rand index can also be useful in a - purely unsupervised setting as a building block for a Consensus - Index that can be used for clustering model selection (TODO). + However (adjusted or unadjusted) Rand index can also be useful in a purely + unsupervised setting as a building block for a Consensus Index that can be + used for clustering model selection (TODO). -- The **unadjusted Rand index is often close to 1.0** even if the - clusterings themselves differ significantly. This can be understood - when interpreting the Rand index as the accuracy of element pair - labeling resulting from the clusterings: In practice there often is - a majority of element pairs that are assigned the ``different`` pair - label under both the predicted and the ground truth clustering - resulting in a high proportion of pair labels that agree, which - leads subsequently to a high score. + - The **unadjusted Rand index is often close to 1.0** even if the clusterings + themselves differ significantly. This can be understood when interpreting + the Rand index as the accuracy of element pair labeling resulting from the + clusterings: In practice there often is a majority of element pairs that are + assigned the ``different`` pair label under both the predicted and the + ground truth clustering resulting in a high proportion of pair labels that + agree, which leads subsequently to a high score. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: - Analysis of the impact of the dataset size on the value of - clustering measures for random assignments. + * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: + Analysis of the impact of the dataset size on the value of clustering measures + for random assignments. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| -If C is a ground truth class assignment and K the clustering, let us -define :math:`a` and :math:`b` as: +If C is a ground truth class assignment and K the clustering, let us define +:math:`a` and :math:`b` as: -- :math:`a`, the number of pairs of elements that are in the same set - in C and in the same set in K +- :math:`a`, the number of pairs of elements that are in the same set in C and + in the same set in K -- :math:`b`, the number of pairs of elements that are in different sets - in C and in different sets in K +- :math:`b`, the number of pairs of elements that are in different sets in C and + in different sets in K The unadjusted Rand index is then given by: .. math:: \text{RI} = \frac{a + b}{C_2^{n_{samples}}} -where :math:`C_2^{n_{samples}}` is the total number of possible pairs -in the dataset. It does not matter if the calculation is performed on -ordered pairs or unordered pairs as long as the calculation is -performed consistently. +where :math:`C_2^{n_{samples}}` is the total number of possible pairs in the +dataset. It does not matter if the calculation is performed on ordered pairs or +unordered pairs as long as the calculation is performed consistently. -However, the Rand index does not guarantee that random label assignments -will get a value close to zero (esp. if the number of clusters is in -the same order of magnitude as the number of samples). +However, the Rand index does not guarantee that random label assignments will +get a value close to zero (esp. if the number of clusters is in the same order +of magnitude as the number of samples). To counter this effect we can discount the expected RI :math:`E[\text{RI}]` of random labelings by defining the adjusted Rand index as follows: .. math:: \text{ARI} = \frac{\text{RI} - E[\text{RI}]}{\max(\text{RI}) - E[\text{RI}]} -.. topic:: References +|details-end| - * `Comparing Partitions - `_ - L. Hubert and P. Arabie, Journal of Classification 1985 +|details-start| +**References** +|details-split| - * `Properties of the Hubert-Arabie adjusted Rand index - `_ - D. Steinley, Psychological Methods 2004 +* `Comparing Partitions + `_ L. Hubert and P. + Arabie, Journal of Classification 1985 - * `Wikipedia entry for the Rand index - `_ +* `Properties of the Hubert-Arabie adjusted Rand index + `_ D. Steinley, Psychological + Methods 2004 - * `Wikipedia entry for the adjusted Rand index - `_ +* `Wikipedia entry for the Rand index + `_ +* `Wikipedia entry for the adjusted Rand index + `_ + +|details-end| .. _mutual_info_score: @@ -1359,44 +1575,39 @@ Bad (e.g. independent labelings) have non-positive scores:: -0.10526... -Advantages -~~~~~~~~~~ - -- **Random (uniform) label assignments have a AMI score close to 0.0** - for any value of ``n_clusters`` and ``n_samples`` (which is not the - case for raw Mutual Information or the V-measure for instance). +.. topic:: Advantages: -- **Upper bound of 1**: Values close to zero indicate two label - assignments that are largely independent, while values close to one - indicate significant agreement. Further, an AMI of exactly 1 indicates - that the two label assignments are equal (with or without permutation). + - **Random (uniform) label assignments have a AMI score close to 0.0** for any + value of ``n_clusters`` and ``n_samples`` (which is not the case for raw + Mutual Information or the V-measure for instance). + - **Upper bound of 1**: Values close to zero indicate two label assignments + that are largely independent, while values close to one indicate significant + agreement. Further, an AMI of exactly 1 indicates that the two label + assignments are equal (with or without permutation). -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- Contrary to inertia, **MI-based measures require the knowledge - of the ground truth classes** while almost never available in practice or - requires manual assignment by human annotators (as in the supervised learning - setting). + - Contrary to inertia, **MI-based measures require the knowledge of the ground + truth classes** while almost never available in practice or requires manual + assignment by human annotators (as in the supervised learning setting). - However MI-based measures can also be useful in purely unsupervised setting as a - building block for a Consensus Index that can be used for clustering - model selection. - -- NMI and MI are not adjusted against chance. + However MI-based measures can also be useful in purely unsupervised setting + as a building block for a Consensus Index that can be used for clustering + model selection. + - NMI and MI are not adjusted against chance. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis of - the impact of the dataset size on the value of clustering measures - for random assignments. This example also includes the Adjusted Rand - Index. + * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for + random assignments. This example also includes the Adjusted Rand Index. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| Assume two label assignments (of the same N objects), :math:`U` and :math:`V`. Their entropy is the amount of uncertainty for a partition set, defined by: @@ -1430,63 +1641,62 @@ adjusted for chance and will tend to increase as the number of different labels between the label assignments. The expected value for the mutual information can be calculated using the -following equation [VEB2009]_. In this equation, -:math:`a_i = |U_i|` (the number of elements in :math:`U_i`) and -:math:`b_j = |V_j|` (the number of elements in :math:`V_j`). - +following equation [VEB2009]_. In this equation, :math:`a_i = |U_i|` (the number +of elements in :math:`U_i`) and :math:`b_j = |V_j|` (the number of elements in +:math:`V_j`). .. math:: E[\text{MI}(U,V)]=\sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \sum_{n_{ij}=(a_i+b_j-N)^+ - }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) - \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! - (N-a_i-b_j+n_{ij})!} + }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) + \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! + (N-a_i-b_j+n_{ij})!} -Using the expected value, the adjusted mutual information can then be -calculated using a similar form to that of the adjusted Rand index: +Using the expected value, the adjusted mutual information can then be calculated +using a similar form to that of the adjusted Rand index: .. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} -For normalized mutual information and adjusted mutual information, the normalizing -value is typically some *generalized* mean of the entropies of each clustering. -Various generalized means exist, and no firm rules exist for preferring one over the -others. The decision is largely a field-by-field basis; for instance, in community -detection, the arithmetic mean is most common. Each -normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In our -implementation, this is controlled by the ``average_method`` parameter. - -Vinh et al. (2010) named variants of NMI and AMI by their averaging method [VEB2010]_. Their -'sqrt' and 'sum' averages are the geometric and arithmetic means; we use these -more broadly common names. +For normalized mutual information and adjusted mutual information, the +normalizing value is typically some *generalized* mean of the entropies of each +clustering. Various generalized means exist, and no firm rules exist for +preferring one over the others. The decision is largely a field-by-field basis; +for instance, in community detection, the arithmetic mean is most common. Each +normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In +our implementation, this is controlled by the ``average_method`` parameter. -.. topic:: References +Vinh et al. (2010) named variants of NMI and AMI by their averaging method +[VEB2010]_. Their 'sqrt' and 'sum' averages are the geometric and arithmetic +means; we use these more broadly common names. - * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a - knowledge reuse framework for combining multiple partitions". Journal of - Machine Learning Research 3: 583–617. - `doi:10.1162/153244303321897735 `_. +.. topic:: References: - * `Wikipedia entry for the (normalized) Mutual Information - `_ + * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a + knowledge reuse framework for combining multiple partitions". Journal of + Machine Learning Research 3: 583–617. `doi:10.1162/153244303321897735 + `_. - * `Wikipedia entry for the Adjusted Mutual Information - `_ + * `Wikipedia entry for the (normalized) Mutual Information + `_ - .. [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures - for clusterings comparison". Proceedings of the 26th Annual International - Conference on Machine Learning - ICML '09. - `doi:10.1145/1553374.1553511 `_. - ISBN 9781605585161. + * `Wikipedia entry for the Adjusted Mutual Information + `_ - .. [VEB2010] Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures for - Clusterings Comparison: Variants, Properties, Normalization and - Correction for Chance". JMLR - + .. [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures + for clusterings comparison". Proceedings of the 26th Annual International + Conference on Machine Learning - ICML '09. `doi:10.1145/1553374.1553511 + `_. ISBN + 9781605585161. - .. [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis of - community - detection algorithms on artificial networks". Scientific Reports 6: 30750. - `doi:10.1038/srep30750 `_. + .. [VEB2010] Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures + for Clusterings Comparison: Variants, Properties, Normalization and + Correction for Chance". JMLR + + .. [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis + of community detection algorithms on artificial networks". Scientific + Reports 6: 30750. `doi:10.1038/srep30750 + `_. +|details-end| .. _homogeneity_completeness: @@ -1568,55 +1778,52 @@ homogeneous but not complete:: homogeneity_score(a, b) == completeness_score(b, a) -Advantages -~~~~~~~~~~ - -- **Bounded scores**: 0.0 is as bad as it can be, 1.0 is a perfect score. +.. topic:: Advantages: -- Intuitive interpretation: clustering with bad V-measure can be - **qualitatively analyzed in terms of homogeneity and completeness** - to better feel what 'kind' of mistakes is done by the assignment. + - **Bounded scores**: 0.0 is as bad as it can be, 1.0 is a perfect score. -- **No assumption is made on the cluster structure**: can be used - to compare clustering algorithms such as k-means which assumes isotropic - blob shapes with results of spectral clustering algorithms which can - find cluster with "folded" shapes. + - Intuitive interpretation: clustering with bad V-measure can be + **qualitatively analyzed in terms of homogeneity and completeness** to + better feel what 'kind' of mistakes is done by the assignment. + - **No assumption is made on the cluster structure**: can be used to compare + clustering algorithms such as k-means which assumes isotropic blob shapes + with results of spectral clustering algorithms which can find cluster with + "folded" shapes. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- The previously introduced metrics are **not normalized with regards to - random labeling**: this means that depending on the number of samples, - clusters and ground truth classes, a completely random labeling will - not always yield the same values for homogeneity, completeness and - hence v-measure. In particular **random labeling won't yield zero - scores especially when the number of clusters is large**. + - The previously introduced metrics are **not normalized with regards to + random labeling**: this means that depending on the number of samples, + clusters and ground truth classes, a completely random labeling will not + always yield the same values for homogeneity, completeness and hence + v-measure. In particular **random labeling won't yield zero scores + especially when the number of clusters is large**. - This problem can safely be ignored when the number of samples is more - than a thousand and the number of clusters is less than 10. **For - smaller sample sizes or larger number of clusters it is safer to use - an adjusted index such as the Adjusted Rand Index (ARI)**. + This problem can safely be ignored when the number of samples is more than a + thousand and the number of clusters is less than 10. **For smaller sample + sizes or larger number of clusters it is safer to use an adjusted index such + as the Adjusted Rand Index (ARI)**. -.. figure:: ../auto_examples/cluster/images/sphx_glr_plot_adjusted_for_chance_measures_001.png - :target: ../auto_examples/cluster/plot_adjusted_for_chance_measures.html - :align: center - :scale: 100 - -- These metrics **require the knowledge of the ground truth classes** while - almost never available in practice or requires manual assignment by - human annotators (as in the supervised learning setting). + .. figure:: ../auto_examples/cluster/images/sphx_glr_plot_adjusted_for_chance_measures_001.png + :target: ../auto_examples/cluster/plot_adjusted_for_chance_measures.html + :align: center + :scale: 100 + - These metrics **require the knowledge of the ground truth classes** while + almost never available in practice or requires manual assignment by human + annotators (as in the supervised learning setting). .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis of - the impact of the dataset size on the value of clustering measures - for random assignments. + * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for + random assignments. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| Homogeneity and completeness scores are formally given by: @@ -1624,8 +1831,8 @@ Homogeneity and completeness scores are formally given by: .. math:: c = 1 - \frac{H(K|C)}{H(K)} -where :math:`H(C|K)` is the **conditional entropy of the classes given -the cluster assignments** and is given by: +where :math:`H(C|K)` is the **conditional entropy of the classes given the +cluster assignments** and is given by: .. math:: H(C|K) = - \sum_{c=1}^{|C|} \sum_{k=1}^{|K|} \frac{n_{c,k}}{n} \cdot \log\left(\frac{n_{c,k}}{n_k}\right) @@ -1634,26 +1841,28 @@ and :math:`H(C)` is the **entropy of the classes** and is given by: .. math:: H(C) = - \sum_{c=1}^{|C|} \frac{n_c}{n} \cdot \log\left(\frac{n_c}{n}\right) -with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` -the number of samples respectively belonging to class :math:`c` and -cluster :math:`k`, and finally :math:`n_{c,k}` the number of samples -from class :math:`c` assigned to cluster :math:`k`. +with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` the +number of samples respectively belonging to class :math:`c` and cluster +:math:`k`, and finally :math:`n_{c,k}` the number of samples from class +:math:`c` assigned to cluster :math:`k`. The **conditional entropy of clusters given class** :math:`H(K|C)` and the **entropy of clusters** :math:`H(K)` are defined in a symmetric manner. -Rosenberg and Hirschberg further define **V-measure** as the **harmonic -mean of homogeneity and completeness**: +Rosenberg and Hirschberg further define **V-measure** as the **harmonic mean of +homogeneity and completeness**: .. math:: v = 2 \cdot \frac{h \cdot c}{h + c} -.. topic:: References +|details-end| + +.. topic:: References: - * `V-Measure: A conditional entropy-based external cluster evaluation - measure `_ - Andrew Rosenberg and Julia Hirschberg, 2007 + * `V-Measure: A conditional entropy-based external cluster evaluation measure + `_ Andrew Rosenberg and Julia + Hirschberg, 2007 - .. [B2011] `Identication and Characterization of Events in Social Media + .. [B2011] `Identification and Characterization of Events in Social Media `_, Hila Becker, PhD Thesis. @@ -1673,7 +1882,7 @@ Where ``TP`` is the number of **True Positive** (i.e. the number of pair of points that belong to the same clusters in both the true labels and the predicted labels), ``FP`` is the number of **False Positive** (i.e. the number of pair of points that belong to the same clusters in the true labels and not -in the predicted labels) and ``FN`` is the number of **False Negative** (i.e the +in the predicted labels) and ``FN`` is the number of **False Negative** (i.e. the number of pair of points that belongs in the same clusters in the predicted labels and not in the true labels). @@ -1708,41 +1917,43 @@ Bad (e.g. independent labelings) have zero scores:: >>> metrics.fowlkes_mallows_score(labels_true, labels_pred) 0.0 -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- **Random (uniform) label assignments have a FMI score close to 0.0** - for any value of ``n_clusters`` and ``n_samples`` (which is not the - case for raw Mutual Information or the V-measure for instance). + - **Random (uniform) label assignments have a FMI score close to 0.0** for any + value of ``n_clusters`` and ``n_samples`` (which is not the case for raw + Mutual Information or the V-measure for instance). -- **Upper-bounded at 1**: Values close to zero indicate two label - assignments that are largely independent, while values close to one - indicate significant agreement. Further, values of exactly 0 indicate - **purely** independent label assignments and a FMI of exactly 1 indicates - that the two label assignments are equal (with or without permutation). + - **Upper-bounded at 1**: Values close to zero indicate two label assignments + that are largely independent, while values close to one indicate significant + agreement. Further, values of exactly 0 indicate **purely** independent + label assignments and a FMI of exactly 1 indicates that the two label + assignments are equal (with or without permutation). -- **No assumption is made on the cluster structure**: can be used - to compare clustering algorithms such as k-means which assumes isotropic - blob shapes with results of spectral clustering algorithms which can - find cluster with "folded" shapes. + - **No assumption is made on the cluster structure**: can be used to compare + clustering algorithms such as k-means which assumes isotropic blob shapes + with results of spectral clustering algorithms which can find cluster with + "folded" shapes. +.. topic:: Drawbacks: -Drawbacks -~~~~~~~~~ + - Contrary to inertia, **FMI-based measures require the knowledge of the + ground truth classes** while almost never available in practice or requires + manual assignment by human annotators (as in the supervised learning + setting). -- Contrary to inertia, **FMI-based measures require the knowledge - of the ground truth classes** while almost never available in practice or - requires manual assignment by human annotators (as in the supervised learning - setting). +|details-start| +**References** +|details-split| -.. topic:: References +* E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two + hierarchical clusterings". Journal of the American Statistical + Association. + https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008 - * E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two - hierarchical clusterings". Journal of the American Statistical Association. - https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008 +* `Wikipedia entry for the Fowlkes-Mallows Index + `_ - * `Wikipedia entry for the Fowlkes-Mallows Index - `_ +|details-end| .. _silhouette_coefficient: @@ -1786,35 +1997,38 @@ cluster analysis. >>> metrics.silhouette_score(X, labels, metric='euclidean') 0.55... -.. topic:: References - * Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the - Interpretation and Validation of Cluster Analysis"<10.1016/0377-0427(87)90125-7>` - . Computational and Applied Mathematics 20: 53–65. +.. topic:: Advantages: + - The score is bounded between -1 for incorrect clustering and +1 for highly + dense clustering. Scores around zero indicate overlapping clusters. -Advantages -~~~~~~~~~~ + - The score is higher when clusters are dense and well separated, which + relates to a standard concept of a cluster. -- The score is bounded between -1 for incorrect clustering and +1 for highly - dense clustering. Scores around zero indicate overlapping clusters. +.. topic:: Drawbacks: -- The score is higher when clusters are dense and well separated, which relates - to a standard concept of a cluster. + - The Silhouette Coefficient is generally higher for convex clusters than + other concepts of clusters, such as density based clusters like those + obtained through DBSCAN. +.. topic:: Examples: -Drawbacks -~~~~~~~~~ + * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In + this example the silhouette analysis is used to choose an optimal value for + n_clusters. -- The Silhouette Coefficient is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained - through DBSCAN. -.. topic:: Examples: +|details-start| +**References** +|details-split| - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In this example - the silhouette analysis is used to choose an optimal value for n_clusters. +* Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the + Interpretation and Validation of Cluster + Analysis"<10.1016/0377-0427(87)90125-7>` . Computational and Applied + Mathematics 20: 53–65. +|details-end| .. _calinski_harabasz_index: @@ -1844,30 +2058,30 @@ cluster analysis: >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X) >>> labels = kmeans_model.labels_ >>> metrics.calinski_harabasz_score(X, labels) - 561.62... + 561.59... -Advantages -~~~~~~~~~~ -- The score is higher when clusters are dense and well separated, which relates - to a standard concept of a cluster. +.. topic:: Advantages: -- The score is fast to compute. + - The score is higher when clusters are dense and well separated, which + relates to a standard concept of a cluster. + - The score is fast to compute. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- The Calinski-Harabasz index is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained - through DBSCAN. + - The Calinski-Harabasz index is generally higher for convex clusters than + other concepts of clusters, such as density based clusters like those + obtained through DBSCAN. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| For a set of data :math:`E` of size :math:`n_E` which has been clustered into :math:`k` clusters, the Calinski-Harabasz score :math:`s` is defined as the -ratio of the between-clusters dispersion mean and the within-cluster dispersion: +ratio of the between-clusters dispersion mean and the within-cluster +dispersion: .. math:: s = \frac{\mathrm{tr}(B_k)}{\mathrm{tr}(W_k)} \times \frac{n_E - k}{k - 1} @@ -1880,17 +2094,22 @@ matrix defined by: .. math:: B_k = \sum_{q=1}^k n_q (c_q - c_E) (c_q - c_E)^T -with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the center -of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and :math:`n_q` the -number of points in cluster :math:`q`. +with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the +center of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and +:math:`n_q` the number of points in cluster :math:`q`. + +|details-end| -.. topic:: References +|details-start| +**References** +|details-split| - * Caliński, T., & Harabasz, J. (1974). - `"A Dendrite Method for Cluster Analysis" - `_. - :doi:`Communications in Statistics-theory and Methods 3: 1-27 <10.1080/03610927408827101>`. +* Caliński, T., & Harabasz, J. (1974). `"A Dendrite Method for Cluster Analysis" + `_. + :doi:`Communications in Statistics-theory and Methods 3: 1-27 + <10.1080/03610927408827101>`. +|details-end| .. _davies-bouldin_index: @@ -1920,26 +2139,27 @@ cluster analysis as follows: >>> kmeans = KMeans(n_clusters=3, random_state=1).fit(X) >>> labels = kmeans.labels_ >>> davies_bouldin_score(X, labels) - 0.6619... + 0.666... -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- The computation of Davies-Bouldin is simpler than that of Silhouette scores. -- The index is solely based on quantities and features inherent to the dataset - as its computation only uses point-wise distances. + - The computation of Davies-Bouldin is simpler than that of Silhouette scores. + - The index is solely based on quantities and features inherent to the dataset + as its computation only uses point-wise distances. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- The Davies-Boulding index is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained from - DBSCAN. -- The usage of centroid distance limits the distance metric to Euclidean space. + - The Davies-Boulding index is generally higher for convex clusters than other + concepts of clusters, such as density based clusters like those obtained + from DBSCAN. + - The usage of centroid distance limits the distance metric to Euclidean + space. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ + +|details-start| +**Mathematical formulation** +|details-split| The index is defined as the average similarity between each cluster :math:`C_i` for :math:`i=1, ..., k` and its most similar one :math:`C_j`. In the context of @@ -1947,34 +2167,38 @@ this index, similarity is defined as a measure :math:`R_{ij}` that trades off: - :math:`s_i`, the average distance between each point of cluster :math:`i` and the centroid of that cluster -- also know as cluster diameter. -- :math:`d_{ij}`, the distance between cluster centroids :math:`i` and :math:`j`. +- :math:`d_{ij}`, the distance between cluster centroids :math:`i` and + :math:`j`. A simple choice to construct :math:`R_{ij}` so that it is nonnegative and symmetric is: .. math:: - R_{ij} = \frac{s_i + s_j}{d_{ij}} + R_{ij} = \frac{s_i + s_j}{d_{ij}} Then the Davies-Bouldin index is defined as: .. math:: - DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} + DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} +|details-end| -.. topic:: References +|details-start| +**References** +|details-split| - * Davies, David L.; Bouldin, Donald W. (1979). - :doi:`"A Cluster Separation Measure" <10.1109/TPAMI.1979.4766909>` - IEEE Transactions on Pattern Analysis and Machine Intelligence. - PAMI-1 (2): 224-227. +* Davies, David L.; Bouldin, Donald W. (1979). :doi:`"A Cluster Separation + Measure" <10.1109/TPAMI.1979.4766909>` IEEE Transactions on Pattern Analysis + and Machine Intelligence. PAMI-1 (2): 224-227. - * Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). - :doi:`"On Clustering Validation Techniques" <10.1023/A:1012801612483>` - Journal of Intelligent Information Systems, 17(2-3), 107-145. +* Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). :doi:`"On + Clustering Validation Techniques" <10.1023/A:1012801612483>` Journal of + Intelligent Information Systems, 17(2-3), 107-145. - * `Wikipedia entry for Davies-Bouldin index - `_. +* `Wikipedia entry for Davies-Bouldin index + `_. +|details-end| .. _contingency_matrix: @@ -2007,30 +2231,32 @@ contingency matrix where the order of rows and columns correspond to a list of classes. -Advantages -~~~~~~~~~~ +.. topic:: Advantages: + + - Allows to examine the spread of each true cluster across predicted clusters + and vice versa. -- Allows to examine the spread of each true cluster across predicted - clusters and vice versa. + - The contingency table calculated is typically utilized in the calculation of + a similarity statistic (like the others listed in this document) between the + two clusterings. -- The contingency table calculated is typically utilized in the calculation - of a similarity statistic (like the others listed in this document) between - the two clusterings. +.. topic:: Drawbacks: -Drawbacks -~~~~~~~~~ + - Contingency matrix is easy to interpret for a small number of clusters, but + becomes very hard to interpret for a large number of clusters. -- Contingency matrix is easy to interpret for a small number of clusters, but - becomes very hard to interpret for a large number of clusters. + - It doesn't give a single metric to use as an objective for clustering + optimisation. -- It doesn't give a single metric to use as an objective for clustering - optimisation. +|details-start| +**References** +|details-split| -.. topic:: References +* `Wikipedia entry for contingency matrix + `_ - * `Wikipedia entry for contingency matrix - `_ +|details-end| .. _pair_confusion_matrix: @@ -2053,19 +2279,19 @@ under the true and predicted clusterings. It has the following entries: - :math:`C_{00}` : number of pairs with both clusterings having the samples - not clustered together +:math:`C_{00}` : number of pairs with both clusterings having the samples +not clustered together - :math:`C_{10}` : number of pairs with the true label clustering having the - samples clustered together but the other clustering not having the samples - clustered together +:math:`C_{10}` : number of pairs with the true label clustering having the +samples clustered together but the other clustering not having the samples +clustered together - :math:`C_{01}` : number of pairs with the true label clustering not having - the samples clustered together but the other clustering having the samples - clustered together +:math:`C_{01}` : number of pairs with the true label clustering not having +the samples clustered together but the other clustering having the samples +clustered together - :math:`C_{11}` : number of pairs with both clusterings having the samples - clustered together +:math:`C_{11}` : number of pairs with both clusterings having the samples +clustered together Considering a pair of samples that is clustered together a positive pair, then as in binary classification the count of true negatives is @@ -2108,7 +2334,11 @@ diagonal entries:: array([[ 0, 0], [12, 0]]) -.. topic:: References +|details-start| +**References** +|details-split| + + * :doi:`"Comparing Partitions" <10.1007/BF01908075>` L. Hubert and P. Arabie, + Journal of Classification 1985 - * :doi:`"Comparing Partitions" <10.1007/BF01908075>` - L. Hubert and P. Arabie, Journal of Classification 1985 +|details-end| diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 4a61b5ec5f118..28931cf52f283 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -5,14 +5,24 @@ Pipelines and composite estimators ================================== -Transformers are usually combined with classifiers, regressors or other -estimators to build a composite estimator. The most common tool is a -:ref:`Pipeline `. Pipeline is often used in combination with -:ref:`FeatureUnion ` which concatenates the output of -transformers into a composite feature space. :ref:`TransformedTargetRegressor -` deals with transforming the :term:`target` -(i.e. log-transform :term:`y`). In contrast, Pipelines only transform the -observed data (:term:`X`). +To build a composite estimator, transformers are usually combined with other +transformers or with :term:`predictors` (such as classifiers or regressors). +The most common tool used for composing estimators is a :ref:`Pipeline +`. Pipelines require all steps except the last to be a +:term:`transformer`. The last step can be anything, a transformer, a +:term:`predictor`, or a clustering estimator which might have or not have a +`.predict(...)` method. A pipeline exposes all methods provided by the last +estimator: if the last step provides a `transform` method, then the pipeline +would have a `transform` method and behave like a transformer. If the last step +provides a `predict` method, then the pipeline would expose that method, and +given a data :term:`X`, use all steps except the last to transform the data, +and then give that transformed data to the `predict` method of the last step of +the pipeline. The class :class:`Pipeline` is often used in combination with +:ref:`ColumnTransformer ` or +:ref:`FeatureUnion ` which concatenate the output of transformers +into a composite feature space. +:ref:`TransformedTargetRegressor ` +deals with transforming the :term:`target` (i.e. log-transform :term:`y`). .. _pipeline: @@ -41,12 +51,21 @@ All estimators in a pipeline, except the last one, must be transformers (i.e. must have a :term:`transform` method). The last estimator may be any type (transformer, classifier, etc.). +.. note:: + + Calling ``fit`` on the pipeline is the same as calling ``fit`` on + each estimator in turn, ``transform`` the input and pass it on to the next step. + The pipeline has all the methods that the last estimator in the pipeline has, + i.e. if the last estimator is a classifier, the :class:`Pipeline` can be used + as a classifier. If the last estimator is a transformer, again, so is the + pipeline. + Usage ----- -Construction -............ +Build a pipeline +................ The :class:`Pipeline` is built using a list of ``(key, value)`` pairs, where the ``key`` is a string containing the name you want to give this step and ``value`` @@ -60,23 +79,41 @@ is an estimator object:: >>> pipe Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())]) +|details-start| +**Shorthand version using :func:`make_pipeline`** +|details-split| + The utility function :func:`make_pipeline` is a shorthand for constructing pipelines; it takes a variable number of estimators and returns a pipeline, filling in the names automatically:: >>> from sklearn.pipeline import make_pipeline - >>> from sklearn.naive_bayes import MultinomialNB - >>> from sklearn.preprocessing import Binarizer - >>> make_pipeline(Binarizer(), MultinomialNB()) - Pipeline(steps=[('binarizer', Binarizer()), ('multinomialnb', MultinomialNB())]) + >>> make_pipeline(PCA(), SVC()) + Pipeline(steps=[('pca', PCA()), ('svc', SVC())]) + +|details-end| -Accessing steps -............... +Access pipeline steps +..................... -The estimators of a pipeline are stored as a list in the ``steps`` attribute, -but can be accessed by index or name by indexing (with ``[idx]``) the -Pipeline:: +The estimators of a pipeline are stored as a list in the ``steps`` attribute. +A sub-pipeline can be extracted using the slicing notation commonly used +for Python Sequences such as lists or strings (although only a step of 1 is +permitted). This is convenient for performing only some of the transformations +(or their inverse): + + >>> pipe[:1] + Pipeline(steps=[('reduce_dim', PCA())]) + >>> pipe[-1:] + Pipeline(steps=[('clf', SVC())]) + +|details-start| +**Accessing a step by name or position** +|details-split| + +A specific step can also be accessed by index or name by indexing (with ``[idx]``) the +pipeline:: >>> pipe.steps[0] ('reduce_dim', PCA()) @@ -85,34 +122,63 @@ Pipeline:: >>> pipe['reduce_dim'] PCA() -Pipeline's `named_steps` attribute allows accessing steps by name with tab +`Pipeline`'s `named_steps` attribute allows accessing steps by name with tab completion in interactive environments:: >>> pipe.named_steps.reduce_dim is pipe['reduce_dim'] True -A sub-pipeline can also be extracted using the slicing notation commonly used -for Python Sequences such as lists or strings (although only a step of 1 is -permitted). This is convenient for performing only some of the transformations -(or their inverse): +|details-end| - >>> pipe[:1] - Pipeline(steps=[('reduce_dim', PCA())]) - >>> pipe[-1:] - Pipeline(steps=[('clf', SVC())]) +Tracking feature names in a pipeline +.................................... + +To enable model inspection, :class:`~sklearn.pipeline.Pipeline` has a +``get_feature_names_out()`` method, just like all transformers. You can use +pipeline slicing to get the feature names going into each step:: + + >>> from sklearn.datasets import load_iris + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.feature_selection import SelectKBest + >>> iris = load_iris() + >>> pipe = Pipeline(steps=[ + ... ('select', SelectKBest(k=2)), + ... ('clf', LogisticRegression())]) + >>> pipe.fit(iris.data, iris.target) + Pipeline(steps=[('select', SelectKBest(...)), ('clf', LogisticRegression(...))]) + >>> pipe[:-1].get_feature_names_out() + array(['x2', 'x3'], ...) + +|details-start| +**Customize feature names** +|details-split| + +You can also provide custom feature names for the input data using +``get_feature_names_out``:: + + >>> pipe[:-1].get_feature_names_out(iris.feature_names) + array(['petal length (cm)', 'petal width (cm)'], ...) +|details-end| .. _pipeline_nested_parameters: -Nested parameters -................. +Access to nested parameters +........................... -Parameters of the estimators in the pipeline can be accessed using the -``__`` syntax:: +It is common to adjust the parameters of an estimator within a pipeline. This parameter +is therefore nested because it belongs to a particular sub-step. Parameters of the +estimators in the pipeline are accessible using the ``__`` +syntax:: + >>> pipe = Pipeline(steps=[("reduce_dim", PCA()), ("clf", SVC())]) >>> pipe.set_params(clf__C=10) Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))]) +|details-start| +**When does it matter?** +|details-split| + This is particularly important for doing grid searches:: >>> from sklearn.model_selection import GridSearchCV @@ -123,42 +189,16 @@ This is particularly important for doing grid searches:: Individual steps may also be replaced as parameters, and non-final steps may be ignored by setting them to ``'passthrough'``:: - >>> from sklearn.linear_model import LogisticRegression >>> param_grid = dict(reduce_dim=['passthrough', PCA(5), PCA(10)], ... clf=[SVC(), LogisticRegression()], ... clf__C=[0.1, 10, 100]) >>> grid_search = GridSearchCV(pipe, param_grid=param_grid) -The estimators of the pipeline can be retrieved by index: - - >>> pipe[0] - PCA() - -or by name:: - - >>> pipe['reduce_dim'] - PCA() - -To enable model inspection, :class:`~sklearn.pipeline.Pipeline` has a -``get_feature_names_out()`` method, just like all transformers. You can use -pipeline slicing to get the feature names going into each step:: - - >>> from sklearn.datasets import load_iris - >>> from sklearn.feature_selection import SelectKBest - >>> iris = load_iris() - >>> pipe = Pipeline(steps=[ - ... ('select', SelectKBest(k=2)), - ... ('clf', LogisticRegression())]) - >>> pipe.fit(iris.data, iris.target) - Pipeline(steps=[('select', SelectKBest(...)), ('clf', LogisticRegression(...))]) - >>> pipe[:-1].get_feature_names_out() - array(['x2', 'x3'], ...) +.. topic:: See Also: -You can also provide custom feature names for the input data using -``get_feature_names_out``:: + * :ref:`composite_grid_search` - >>> pipe[:-1].get_feature_names_out(iris.feature_names) - array(['petal length (cm)', 'petal width (cm)'], ...) +|details-end| .. topic:: Examples: @@ -170,20 +210,6 @@ You can also provide custom feature names for the input data using * :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py` * :ref:`sphx_glr_auto_examples_miscellaneous_plot_pipeline_display.py` -.. topic:: See Also: - - * :ref:`composite_grid_search` - - -Notes ------ - -Calling ``fit`` on the pipeline is the same as calling ``fit`` on -each estimator in turn, ``transform`` the input and pass it on to the next step. -The pipeline has all the methods that the last estimator in the pipeline has, -i.e. if the last estimator is a classifier, the :class:`Pipeline` can be used -as a classifier. If the last estimator is a transformer, again, so is the -pipeline. .. _pipeline_cache: @@ -198,7 +224,7 @@ after calling ``fit``. This feature is used to avoid computing the fit transformers within a pipeline if the parameters and input data are identical. A typical example is the case of a grid search in which the transformers can be fitted only once and reused for -each configuration. +each configuration. The last step will never be cached, even if it is a transformer. The parameter ``memory`` is needed in order to cache the transformers. ``memory`` can be either a string containing the directory where to cache the @@ -219,43 +245,49 @@ object:: >>> # Clear the cache directory when you don't need it anymore >>> rmtree(cachedir) -.. warning:: **Side effect of caching transformers** - - Using a :class:`Pipeline` without cache enabled, it is possible to - inspect the original instance such as:: - - >>> from sklearn.datasets import load_digits - >>> X_digits, y_digits = load_digits(return_X_y=True) - >>> pca1 = PCA() - >>> svm1 = SVC() - >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) - >>> pipe.fit(X_digits, y_digits) - Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())]) - >>> # The pca instance can be inspected directly - >>> print(pca1.components_) - [[-1.77484909e-19 ... 4.07058917e-18]] - - Enabling caching triggers a clone of the transformers before fitting. - Therefore, the transformer instance given to the pipeline cannot be - inspected directly. - In following example, accessing the :class:`PCA` instance ``pca2`` - will raise an ``AttributeError`` since ``pca2`` will be an unfitted - transformer. - Instead, use the attribute ``named_steps`` to inspect estimators within - the pipeline:: - - >>> cachedir = mkdtemp() - >>> pca2 = PCA() - >>> svm2 = SVC() - >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)], - ... memory=cachedir) - >>> cached_pipe.fit(X_digits, y_digits) - Pipeline(memory=..., - steps=[('reduce_dim', PCA()), ('clf', SVC())]) - >>> print(cached_pipe.named_steps['reduce_dim'].components_) - [[-1.77484909e-19 ... 4.07058917e-18]] - >>> # Remove the cache directory - >>> rmtree(cachedir) +|details-start| +**Warning: Side effect of caching transformers** +|details-split| + +Using a :class:`Pipeline` without cache enabled, it is possible to +inspect the original instance such as:: + + >>> from sklearn.datasets import load_digits + >>> X_digits, y_digits = load_digits(return_X_y=True) + >>> pca1 = PCA(n_components=10) + >>> svm1 = SVC() + >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) + >>> pipe.fit(X_digits, y_digits) + Pipeline(steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) + >>> # The pca instance can be inspected directly + >>> pca1.components_.shape + (10, 64) + + +Enabling caching triggers a clone of the transformers before fitting. +Therefore, the transformer instance given to the pipeline cannot be +inspected directly. +In following example, accessing the :class:`~sklearn.decomposition.PCA` +instance ``pca2`` will raise an ``AttributeError`` since ``pca2`` will be an +unfitted transformer. +Instead, use the attribute ``named_steps`` to inspect estimators within +the pipeline:: + + >>> cachedir = mkdtemp() + >>> pca2 = PCA(n_components=10) + >>> svm2 = SVC() + >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)], + ... memory=cachedir) + >>> cached_pipe.fit(X_digits, y_digits) + Pipeline(memory=..., + steps=[('reduce_dim', PCA(n_components=10)), ('clf', SVC())]) + >>> cached_pipe.named_steps['reduce_dim'].components_.shape + (10, 64) + >>> # Remove the cache directory + >>> rmtree(cachedir) + + +|details-end| .. topic:: Examples: diff --git a/doc/modules/covariance.rst b/doc/modules/covariance.rst index c97676ea62108..50927f9a677f6 100644 --- a/doc/modules/covariance.rst +++ b/doc/modules/covariance.rst @@ -160,8 +160,10 @@ object to the same sample. .. topic:: References: - .. [2] Chen et al., "Shrinkage Algorithms for MMSE Covariance Estimation", - IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010. + .. [2] :arxiv:`"Shrinkage algorithms for MMSE covariance estimation.", + Chen, Y., Wiesel, A., Eldar, Y. C., & Hero, A. O. + IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010. + <0907.4698>` .. topic:: Examples: diff --git a/doc/modules/cross_decomposition.rst b/doc/modules/cross_decomposition.rst index caaec18c6c6d2..8f8d217f87144 100644 --- a/doc/modules/cross_decomposition.rst +++ b/doc/modules/cross_decomposition.rst @@ -28,7 +28,7 @@ PLS draws similarities with `Principal Component Regression `_ (PCR), where the samples are first projected into a lower-dimensional subspace, and the targets `y` are predicted using `transformed(X)`. One issue with PCR is that -the dimensionality reduction is unsupervized, and may lose some important +the dimensionality reduction is unsupervised, and may lose some important variables: PCR would keep the features with the most variance, but it's possible that features with a small variances are relevant from predicting the target. In a way, PLS allows for the same kind of dimensionality @@ -92,9 +92,9 @@ Step *a)* may be performed in two ways: either by computing the whole SVD of values, or by directly computing the singular vectors using the power method (cf section 11.3 in [1]_), which corresponds to the `'nipals'` option of the `algorithm` parameter. - -Transforming data -^^^^^^^^^^^^^^^^^ +|details-start| +**Transforming data** +|details-split| To transform :math:`X` into :math:`\bar{X}`, we need to find a projection matrix :math:`P` such that :math:`\bar{X} = XP`. We know that for the @@ -106,9 +106,11 @@ training data, :math:`\Xi = XP`, and :math:`X = \Xi \Gamma^T`. Setting Similarly, :math:`Y` can be transformed using the rotation matrix :math:`V(\Delta^T V)^{-1}`, accessed via the `y_rotations_` attribute. +|details-end| -Predicting the targets Y -^^^^^^^^^^^^^^^^^^^^^^^^ +|details-start| +**Predicting the targets Y** +|details-split| To predict the targets of some data :math:`X`, we are looking for a coefficient matrix :math:`\beta \in R^{d \times t}` such that :math:`Y = @@ -125,6 +127,8 @@ P \Delta^T`, and as a result the coefficient matrix :math:`\beta = \alpha P :math:`\beta` can be accessed through the `coef_` attribute. +|details-end| + PLSSVD ------ @@ -180,14 +184,17 @@ Since :class:`CCA` involves the inversion of :math:`X_k^TX_k` and :math:`Y_k^TY_k`, this estimator can be unstable if the number of features or targets is greater than the number of samples. - -.. topic:: Reference: +|details-start| +**Reference** +|details-split| .. [1] `A survey of Partial Least Squares (PLS) methods, with emphasis on the two-block case `_ JA Wegelin +|details-end| + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py` diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst index 72bad0bf8ef87..34f14fe6846a2 100644 --- a/doc/modules/cross_validation.rst +++ b/doc/modules/cross_validation.rst @@ -86,10 +86,10 @@ the training set is split into *k* smaller sets but generally follow the same principles). The following procedure is followed for each of the *k* "folds": - * A model is trained using :math:`k-1` of the folds as training data; - * the resulting model is validated on the remaining part of the data - (i.e., it is used as a test set to compute a performance measure - such as accuracy). +* A model is trained using :math:`k-1` of the folds as training data; +* the resulting model is validated on the remaining part of the data + (i.e., it is used as a test set to compute a performance measure + such as accuracy). The performance measure reported by *k*-fold cross-validation is then the average of the values computed in the loop. @@ -102,6 +102,7 @@ where the number of samples is very small. .. image:: ../images/grid_search_cross_validation.png :width: 500px :height: 300px + :alt: A depiction of a 5 fold cross validation on a training set, while holding out a test set. :align: center Computing cross-validated metrics @@ -169,7 +170,9 @@ indices, for example:: >>> cross_val_score(clf, X, y, cv=custom_cv) array([1. , 0.973...]) -.. topic:: Data transformation with held out data +|details-start| +**Data transformation with held out data** +|details-split| Just as it is important to test a predictor on data held-out from training, preprocessing (such as standardization, feature selection, etc.) @@ -196,6 +199,7 @@ indices, for example:: See :ref:`combining_estimators`. +|details-end| .. _multimetric_cross_validation: @@ -208,8 +212,8 @@ two ways: - It allows specifying multiple metrics for evaluation. - It returns a dict containing fit-times, score-times - (and optionally training scores as well as fitted estimators) in - addition to the test score. + (and optionally training scores, fitted estimators, train-test split indices) + in addition to the test score. For single metric evaluation, where the scoring parameter is a string, callable or None, the keys will be - ``['test_score', 'fit_time', 'score_time']`` @@ -220,10 +224,10 @@ following keys - ``return_train_score`` is set to ``False`` by default to save computation time. To evaluate the scores on the training set as well you need to set it to -``True``. - -You may also retain the estimator fitted on each training set by setting -``return_estimator=True``. +``True``. You may also retain the estimator fitted on each training set by +setting ``return_estimator=True``. Similarly, you may set +`return_indices=True` to retain the training and testing indices used to split +the dataset into train and test sets for each cv split. The multiple metrics can be specified either as a list, tuple or set of predefined scorer names:: @@ -438,20 +442,23 @@ then 5- or 10- fold cross validation can overestimate the generalization error. As a general rule, most authors, and empirical evidence, suggest that 5- or 10- fold cross validation should be preferred to LOO. - -.. topic:: References: +|details-start| +**References** +|details-split| * ``_; * T. Hastie, R. Tibshirani, J. Friedman, `The Elements of Statistical Learning `_, Springer 2009 * L. Breiman, P. Spector `Submodel selection and evaluation in regression: The X-random case - `_, International Statistical Review 1992; + `_, International Statistical Review 1992; * R. Kohavi, `A Study of Cross-Validation and Bootstrap for Accuracy Estimation and Model Selection `_, Intl. Jnt. Conf. AI * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation `_, SIAM 2008; * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to - Statistical Learning `_, Springer 2013. + Statistical Learning `_, Springer 2013. + +|details-end| .. _leave_p_out: @@ -520,8 +527,8 @@ the proportion of samples on each side of the train / test split. .. _stratification: -Cross-validation iterators with stratification based on class labels. ---------------------------------------------------------------------- +Cross-validation iterators with stratification based on class labels +-------------------------------------------------------------------- Some classification problems can exhibit a large imbalance in the distribution of the target classes: for instance there could be several times more negative @@ -590,6 +597,19 @@ Here is a visualization of the cross-validation behavior. :align: center :scale: 75% +.. _predefined_split: + +Predefined fold-splits / Validation-sets +---------------------------------------- + +For some datasets, a pre-defined split of the data into training- and +validation fold or into several cross-validation folds already +exists. Using :class:`PredefinedSplit` it is possible to use these folds +e.g. when searching for hyperparameters. + +For example, when using a validation set, set the ``test_fold`` to 0 for all +samples that are part of the validation set, and to -1 for all other samples. + .. _group_cv: Cross-validation iterators for grouped data @@ -680,7 +700,9 @@ Example:: [ 0 1 4 5 6 7 8 9 11 12 13 14] [ 2 3 10 15 16 17] [ 1 2 3 8 9 10 12 13 14 15 16 17] [ 0 4 5 6 7 11] -Implementation notes: +|details-start| +**Implementation notes** +|details-split| - With the current implementation full shuffle is not possible in most scenarios. When shuffle=True, the following happens: @@ -701,6 +723,8 @@ Implementation notes: even if perfect stratification is possible. If you have relatively close distribution of classes in each group, using :class:`GroupKFold` is better. +|details-end| + Here is a visualization of cross-validation behavior for uneven groups: .. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_cv_indices_005.png @@ -807,19 +831,6 @@ expensive. In such a scenario, :class:`GroupShuffleSplit` provides a random sample (with replacement) of the train / test splits generated by :class:`LeavePGroupsOut`. -.. _predefined_split: - -Predefined Fold-Splits / Validation-Sets ----------------------------------------- - -For some datasets, a pre-defined split of the data into training- and -validation fold or into several cross-validation folds already -exists. Using :class:`PredefinedSplit` it is possible to use these folds -e.g. when searching for hyperparameters. - -For example, when using a validation set, set the ``test_fold`` to 0 for all -samples that are part of the validation set, and to -1 for all other samples. - Using cross-validation iterators to split train and test -------------------------------------------------------- @@ -992,8 +1003,12 @@ individual model is very fast. * :ref:`sphx_glr_auto_examples_model_selection_plot_permutation_tests_for_classification.py` -.. topic:: References: +|details-start| +**References** +|details-split| * Ojala and Garriga. `Permutation Tests for Studying Classifier Performance `_. J. Mach. Learn. Res. 2010. + +|details-end| diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index 293f31dacd091..e34818a322c7d 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -53,6 +53,7 @@ data based on the amount of variance it explains. As such it implements a .. topic:: Examples: + * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py` * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py` * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py` @@ -71,11 +72,11 @@ exactly match the results of :class:`PCA` while processing the data in a minibatch fashion. :class:`IncrementalPCA` makes it possible to implement out-of-core Principal Component Analysis either by: - * Using its ``partial_fit`` method on chunks of data fetched sequentially - from the local hard drive or a network database. +* Using its ``partial_fit`` method on chunks of data fetched sequentially + from the local hard drive or a network database. - * Calling its fit method on a sparse matrix or a memory mapped file using - ``numpy.memmap``. +* Calling its fit method on a memory mapped file using + ``numpy.memmap``. :class:`IncrementalPCA` only stores estimates of component and noise variances, in order update ``explained_variance_ratio_`` incrementally. This is why @@ -290,6 +291,8 @@ prediction (kernel dependency estimation). :class:`KernelPCA` supports both .. topic:: Examples: * :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py` + * :ref:`sphx_glr_auto_examples_applications_plot_digits_denoising.py` + .. topic:: References: @@ -319,6 +322,11 @@ is eigendecomposed in the Kernel PCA fitting process has an effective rank that is much smaller than its size. This is a situation where approximate eigensolvers can provide speedup with very low precision loss. + +|details-start| +**Eigensolvers** +|details-split| + The optional parameter ``eigen_solver='randomized'`` can be used to *significantly* reduce the computation time when the number of requested ``n_components`` is small compared with the number of samples. It relies on @@ -343,6 +351,7 @@ is extremely small. It is enabled by default when the desired number of components is less than 10 (strict) and the number of samples is more than 200 (strict). See :class:`KernelPCA` for details. + .. topic:: References: * *dense* solver: @@ -351,20 +360,22 @@ components is less than 10 (strict) and the number of samples is more than 200 * *randomized* solver: - * Algorithm 4.3 in - :arxiv:`"Finding structure with randomness: Stochastic - algorithms for constructing approximate matrix decompositions" <0909.4061>` - Halko, et al. (2009) + * Algorithm 4.3 in + :arxiv:`"Finding structure with randomness: Stochastic + algorithms for constructing approximate matrix decompositions" <0909.4061>` + Halko, et al. (2009) - * :arxiv:`"An implementation of a randomized algorithm - for principal component analysis" <1412.3510>` - A. Szlam et al. (2014) + * :arxiv:`"An implementation of a randomized algorithm + for principal component analysis" <1412.3510>` + A. Szlam et al. (2014) * *arpack* solver: `scipy.sparse.linalg.eigsh documentation `_ R. B. Lehoucq, D. C. Sorensen, and C. Yang, (1998) +|details-end| + .. _LSA: @@ -375,6 +386,16 @@ Truncated singular value decomposition and latent semantic analysis (SVD) that only computes the :math:`k` largest singular values, where :math:`k` is a user-specified parameter. +:class:`TruncatedSVD` is very similar to :class:`PCA`, but differs +in that the matrix :math:`X` does not need to be centered. +When the columnwise (per-feature) means of :math:`X` +are subtracted from the feature values, +truncated SVD on the resulting matrix is equivalent to PCA. + +|details-start| +**About truncated SVD and latent semantic analysis (LSA)** +|details-split| + When truncated SVD is applied to term-document matrices (as returned by :class:`~sklearn.feature_extraction.text.CountVectorizer` or :class:`~sklearn.feature_extraction.text.TfidfVectorizer`), @@ -415,15 +436,6 @@ To also transform a test set :math:`X`, we multiply it with :math:`V_k`: We present LSA in a different way that matches the scikit-learn API better, but the singular values found are the same. -:class:`TruncatedSVD` is very similar to :class:`PCA`, but differs -in that the matrix :math:`X` does not need to be centered. -When the columnwise (per-feature) means of :math:`X` -are subtracted from the feature values, -truncated SVD on the resulting matrix is equivalent to PCA. -In practical terms, this means -that the :class:`TruncatedSVD` transformer accepts ``scipy.sparse`` -matrices without the need to densify them, -as densifying may fill up memory even for medium-sized document collections. While the :class:`TruncatedSVD` transformer works with any feature matrix, @@ -434,6 +446,8 @@ should be turned on (``sublinear_tf=True, use_idf=True``) to bring the feature values closer to a Gaussian distribution, compensating for LSA's erroneous assumptions about textual data. +|details-end| + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` @@ -446,6 +460,7 @@ compensating for LSA's erroneous assumptions about textual data. `_ + .. _DictionaryLearning: Dictionary Learning @@ -623,7 +638,7 @@ does not fit into the memory. computationally efficient and implements on-line learning with a ``partial_fit`` method. - Example: :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` + Example: :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` .. currentmodule:: sklearn.decomposition @@ -808,7 +823,7 @@ faces dataset, in comparison with the PCA eigenfaces. .. centered:: |pca_img5| |nmf_img5| -The :attr:`init` attribute determines the initialization method applied, which +The `init` attribute determines the initialization method applied, which has a great impact on the performance of the method. :class:`NMF` implements the method Nonnegative Double Singular Value Decomposition. NNDSVD [4]_ is based on two SVD processes, one approximating the data matrix, the other approximating @@ -825,20 +840,20 @@ basic NNDSVD algorithm which introduces a lot of zeros; in this case, NNDSVDa or NNDSVDar should be preferred. :class:`NMF` can also be initialized with correctly scaled random non-negative -matrices by setting :attr:`init="random"`. An integer seed or a -``RandomState`` can also be passed to :attr:`random_state` to control +matrices by setting `init="random"`. An integer seed or a +``RandomState`` can also be passed to `random_state` to control reproducibility. -In :class:`NMF`, L1 and L2 priors can be added to the loss function in order -to regularize the model. The L2 prior uses the Frobenius norm, while the L1 -prior uses an elementwise L1 norm. As in :class:`ElasticNet`, we control the -combination of L1 and L2 with the :attr:`l1_ratio` (:math:`\rho`) parameter, -and the intensity of the regularization with the :attr:`alpha_W` and :attr:`alpha_H` -(:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are scaled by the number -of samples (:math:`n\_samples`) for `H` and the number of features (:math:`n\_features`) -for `W` to keep their impact balanced with respect to one another and to the data fit -term as independent as possible of the size of the training set. Then the priors terms -are: +In :class:`NMF`, L1 and L2 priors can be added to the loss function in order to +regularize the model. The L2 prior uses the Frobenius norm, while the L1 prior +uses an elementwise L1 norm. As in :class:`~sklearn.linear_model.ElasticNet`, +we control the combination of L1 and L2 with the `l1_ratio` (:math:`\rho`) +parameter, and the intensity of the regularization with the `alpha_W` and +`alpha_H` (:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are +scaled by the number of samples (:math:`n\_samples`) for `H` and the number of +features (:math:`n\_features`) for `W` to keep their impact balanced with +respect to one another and to the data fit term as independent as possible of +the size of the training set. Then the priors terms are: .. math:: (\alpha_W \rho ||W||_1 + \frac{\alpha_W(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2) * n\_features @@ -879,8 +894,7 @@ defined by : .. math:: d_{\beta}(X, Y) = \sum_{i,j} \frac{1}{\beta(\beta - 1)}(X_{ij}^\beta + (\beta-1)Y_{ij}^\beta - \beta X_{ij} Y_{ij}^{\beta - 1}) -.. figure:: ../auto_examples/decomposition/images/sphx_glr_plot_beta_divergence_001.png - :target: ../auto_examples/decomposition/plot_beta_divergence.html +.. image:: ../images/beta_divergence.png :align: center :scale: 75% @@ -888,6 +902,10 @@ Note that this definition is not valid if :math:`\beta \in (0; 1)`, yet it can be continuously extended to the definitions of :math:`d_{KL}` and :math:`d_{IS}` respectively. +|details-start| +**NMF implemented solvers** +|details-split| + :class:`NMF` implements two solvers, using Coordinate Descent ('cd') [5]_, and Multiplicative Update ('mu') [6]_. The 'mu' solver can optimize every beta-divergence, including of course the Frobenius norm (:math:`\beta=2`), the @@ -901,6 +919,8 @@ The 'cd' solver can only optimize the Frobenius norm. Due to the underlying non-convexity of NMF, the different solvers may converge to different minima, even when optimizing the same distance function. +|details-end| + NMF is best used with the ``fit_transform`` method, which returns the matrix W. The matrix H is stored into the fitted model in the ``components_`` attribute; the method ``transform`` will decompose a new matrix X_new based on these @@ -915,11 +935,12 @@ stored components:: >>> X_new = np.array([[1, 0], [1, 6.1], [1, 0], [1, 4], [3.2, 1], [0, 4]]) >>> W_new = model.transform(X_new) + + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py` * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` - * :ref:`sphx_glr_auto_examples_decomposition_plot_beta_divergence.py` .. _MiniBatchNMF: @@ -951,7 +972,7 @@ is not readily available from the start, or when the data does not fit into memo D. Lee, S. Seung, 1999 .. [2] `"Non-negative Matrix Factorization with Sparseness Constraints" - `_ + `_ P. Hoyer, 2004 .. [4] `"SVD based initialization: A head start for nonnegative @@ -989,10 +1010,10 @@ The graphical model of LDA is a three-level generative model: Note on notations presented in the graphical model above, which can be found in Hoffman et al. (2013): - * The corpus is a collection of :math:`D` documents. - * A document is a sequence of :math:`N` words. - * There are :math:`K` topics in the corpus. - * The boxes represent repeated sampling. +* The corpus is a collection of :math:`D` documents. +* A document is a sequence of :math:`N` words. +* There are :math:`K` topics in the corpus. +* The boxes represent repeated sampling. In the graphical model, each node is a random variable and has a role in the generative process. A shaded node indicates an observed variable and an unshaded @@ -1002,25 +1023,29 @@ of topics in the corpus and the distribution of words in the documents. The goal of LDA is to use the observed words to infer the hidden topic structure. +|details-start| +**Details on modeling text corpora** +|details-split| + When modeling text corpora, the model assumes the following generative process for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K` -corresponding to :attr:`n_components` in the API: +corresponding to `n_components` in the API: - 1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim - \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words, - i.e. the probability of a word appearing in topic :math:`k`. - :math:`\eta` corresponds to :attr:`topic_word_prior`. +1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim + \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words, + i.e. the probability of a word appearing in topic :math:`k`. + :math:`\eta` corresponds to `topic_word_prior`. - 2. For each document :math:`d \in D`, draw the topic proportions - :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha` - corresponds to :attr:`doc_topic_prior`. +2. For each document :math:`d \in D`, draw the topic proportions + :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha` + corresponds to `doc_topic_prior`. - 3. For each word :math:`i` in document :math:`d`: +3. For each word :math:`i` in document :math:`d`: - a. Draw the topic assignment :math:`z_{di} \sim \mathrm{Multinomial} - (\theta_d)` - b. Draw the observed word :math:`w_{ij} \sim \mathrm{Multinomial} - (\beta_{z_{di}})` + a. Draw the topic assignment :math:`z_{di} \sim \mathrm{Multinomial} + (\theta_d)` + b. Draw the observed word :math:`w_{ij} \sim \mathrm{Multinomial} + (\beta_{z_{di}})` For parameter estimation, the posterior distribution is: @@ -1042,6 +1067,8 @@ Maximizing ELBO is equivalent to minimizing the Kullback-Leibler(KL) divergence between :math:`q(z,\theta,\beta)` and the true posterior :math:`p(z, \theta, \beta |w, \alpha, \eta)`. +|details-end| + :class:`LatentDirichletAllocation` implements the online variational Bayes algorithm and supports both online and batch update methods. While the batch method updates variational variables after each full pass through @@ -1056,7 +1083,7 @@ points. When :class:`LatentDirichletAllocation` is applied on a "document-term" matrix, the matrix will be decomposed into a "topic-term" matrix and a "document-topic" matrix. While -"topic-term" matrix is stored as :attr:`components_` in the model, "document-topic" matrix +"topic-term" matrix is stored as `components_` in the model, "document-topic" matrix can be calculated from ``transform`` method. :class:`LatentDirichletAllocation` also implements ``partial_fit`` method. This is used @@ -1069,7 +1096,7 @@ when data can be fetched sequentially. .. topic:: References: * `"Latent Dirichlet Allocation" - `_ + `_ D. Blei, A. Ng, M. Jordan, 2003 * `"Online Learning for Latent Dirichlet Allocation” diff --git a/doc/modules/density.rst b/doc/modules/density.rst index fc0530ed262c0..5a9b456010aa3 100644 --- a/doc/modules/density.rst +++ b/doc/modules/density.rst @@ -113,6 +113,10 @@ forms, which are shown in the following figure: .. centered:: |kde_kernels| +|details-start| +**kernels' mathematical expressions** +|details-split| + The form of these kernels is as follows: * Gaussian kernel (``kernel = 'gaussian'``) @@ -139,6 +143,8 @@ The form of these kernels is as follows: :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h` +|details-end| + The kernel density estimator can be used with any of the valid distance metrics (see :class:`~sklearn.metrics.DistanceMetric` for a list of available metrics), though the results are properly normalized only diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 0c4159165e181..9120bd855fd01 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1,502 +1,509 @@ .. _ensemble: -================ -Ensemble methods -================ +=========================================================================== +Ensembles: Gradient boosting, random forests, bagging, voting, stacking +=========================================================================== .. currentmodule:: sklearn.ensemble -The goal of **ensemble methods** is to combine the predictions of several +**Ensemble methods** combine the predictions of several base estimators built with a given learning algorithm in order to improve generalizability / robustness over a single estimator. -Two families of ensemble methods are usually distinguished: +Two very famous examples of ensemble methods are :ref:`gradient-boosted trees +` and :ref:`random forests `. -- In **averaging methods**, the driving principle is to build several - estimators independently and then to average their predictions. On average, - the combined estimator is usually better than any of the single base - estimator because its variance is reduced. +More generally, ensemble models can be applied to any base learner beyond +trees, in averaging methods such as :ref:`Bagging methods `, +:ref:`model stacking `, or :ref:`Voting `, or in +boosting, as :ref:`AdaBoost `. - **Examples:** :ref:`Bagging methods `, :ref:`Forests of randomized trees `, ... +.. contents:: + :local: + :depth: 1 -- By contrast, in **boosting methods**, base estimators are built sequentially - and one tries to reduce the bias of the combined estimator. The motivation is - to combine several weak models to produce a powerful ensemble. +.. _gradient_boosting: - **Examples:** :ref:`AdaBoost `, :ref:`Gradient Tree Boosting `, ... +Gradient-boosted trees +====================== +`Gradient Tree Boosting `_ +or Gradient Boosted Decision Trees (GBDT) is a generalization +of boosting to arbitrary differentiable loss functions, see the seminal work of +[Friedman2001]_. GBDT is an excellent model for both regression and +classification, in particular for tabular data. -.. _bagging: +.. topic:: :class:`GradientBoostingClassifier` vs :class:`HistGradientBoostingClassifier` -Bagging meta-estimator -====================== + Scikit-learn provides two implementations of gradient-boosted trees: + :class:`HistGradientBoostingClassifier` vs + :class:`GradientBoostingClassifier` for classification, and the + corresponding classes for regression. The former can be **orders of + magnitude faster** than the latter when the number of samples is + larger than tens of thousands of samples. -In ensemble algorithms, bagging methods form a class of algorithms which build -several instances of a black-box estimator on random subsets of the original -training set and then aggregate their individual predictions to form a final -prediction. These methods are used as a way to reduce the variance of a base -estimator (e.g., a decision tree), by introducing randomization into its -construction procedure and then making an ensemble out of it. In many cases, -bagging methods constitute a very simple way to improve with respect to a -single model, without making it necessary to adapt the underlying base -algorithm. As they provide a way to reduce overfitting, bagging methods work -best with strong and complex models (e.g., fully developed decision trees), in -contrast with boosting methods which usually work best with weak models (e.g., -shallow decision trees). + Missing values and categorical data are natively supported by the + Hist... version, removing the need for additional preprocessing such as + imputation. -Bagging methods come in many flavours but mostly differ from each other by the -way they draw random subsets of the training set: + :class:`GradientBoostingClassifier` and + :class:`GradientBoostingRegressor`, might be preferred for small sample + sizes since binning may lead to split points that are too approximate + in this setting. - * When random subsets of the dataset are drawn as random subsets of the - samples, then this algorithm is known as Pasting [B1999]_. +.. _histogram_based_gradient_boosting: - * When samples are drawn with replacement, then the method is known as - Bagging [B1996]_. +Histogram-Based Gradient Boosting +---------------------------------- - * When random subsets of the dataset are drawn as random subsets of - the features, then the method is known as Random Subspaces [H1998]_. +Scikit-learn 0.21 introduced two new implementations of +gradient boosted trees, namely :class:`HistGradientBoostingClassifier` +and :class:`HistGradientBoostingRegressor`, inspired by +`LightGBM `__ (See [LightGBM]_). - * Finally, when base estimators are built on subsets of both samples and - features, then the method is known as Random Patches [LG2012]_. +These histogram-based estimators can be **orders of magnitude faster** +than :class:`GradientBoostingClassifier` and +:class:`GradientBoostingRegressor` when the number of samples is larger +than tens of thousands of samples. -In scikit-learn, bagging methods are offered as a unified -:class:`BaggingClassifier` meta-estimator (resp. :class:`BaggingRegressor`), -taking as input a user-specified estimator along with parameters -specifying the strategy to draw random subsets. In particular, ``max_samples`` -and ``max_features`` control the size of the subsets (in terms of samples and -features), while ``bootstrap`` and ``bootstrap_features`` control whether -samples and features are drawn with or without replacement. When using a subset -of the available samples the generalization accuracy can be estimated with the -out-of-bag samples by setting ``oob_score=True``. As an example, the -snippet below illustrates how to instantiate a bagging ensemble of -:class:`KNeighborsClassifier` estimators, each built on random subsets of -50% of the samples and 50% of the features. +They also have built-in support for missing values, which avoids the need +for an imputer. - >>> from sklearn.ensemble import BaggingClassifier - >>> from sklearn.neighbors import KNeighborsClassifier - >>> bagging = BaggingClassifier(KNeighborsClassifier(), - ... max_samples=0.5, max_features=0.5) +These fast estimators first bin the input samples ``X`` into +integer-valued bins (typically 256 bins) which tremendously reduces the +number of splitting points to consider, and allows the algorithm to +leverage integer-based data structures (histograms) instead of relying on +sorted continuous values when building the trees. The API of these +estimators is slightly different, and some of the features from +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` +are not yet supported, for instance some loss functions. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py` + * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` + * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` -.. topic:: References +Usage +^^^^^ - .. [B1999] L. Breiman, "Pasting small votes for classification in large - databases and on-line", Machine Learning, 36(1), 85-103, 1999. +Most of the parameters are unchanged from +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`. +One exception is the ``max_iter`` parameter that replaces ``n_estimators``, and +controls the number of iterations of the boosting process:: - .. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2), - 123-140, 1996. + >>> from sklearn.ensemble import HistGradientBoostingClassifier + >>> from sklearn.datasets import make_hastie_10_2 - .. [H1998] T. Ho, "The random subspace method for constructing decision - forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, - 1998. + >>> X, y = make_hastie_10_2(random_state=0) + >>> X_train, X_test = X[:2000], X[2000:] + >>> y_train, y_test = y[:2000], y[2000:] - .. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches", - Machine Learning and Knowledge Discovery in Databases, 346-361, 2012. + >>> clf = HistGradientBoostingClassifier(max_iter=100).fit(X_train, y_train) + >>> clf.score(X_test, y_test) + 0.8965 -.. _forest: +Available losses for regression are 'squared_error', +'absolute_error', which is less sensitive to outliers, and +'poisson', which is well suited to model counts and frequencies. For +classification, 'log_loss' is the only option. For binary classification it uses the +binary log loss, also known as binomial deviance or binary cross-entropy. For +`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance +and categorical cross-entropy as alternative names. The appropriate loss version is +selected based on :term:`y` passed to :term:`fit`. -Forests of randomized trees -=========================== +The size of the trees can be controlled through the ``max_leaf_nodes``, +``max_depth``, and ``min_samples_leaf`` parameters. -The :mod:`sklearn.ensemble` module includes two averaging algorithms based -on randomized :ref:`decision trees `: the RandomForest algorithm -and the Extra-Trees method. Both algorithms are perturb-and-combine -techniques [B1998]_ specifically designed for trees. This means a diverse -set of classifiers is created by introducing randomness in the classifier -construction. The prediction of the ensemble is given as the averaged -prediction of the individual classifiers. +The number of bins used to bin the data is controlled with the ``max_bins`` +parameter. Using less bins acts as a form of regularization. It is generally +recommended to use as many bins as possible (255), which is the default. -As other classifiers, forest classifiers have to be fitted with two -arrays: a sparse or dense array X of shape ``(n_samples, n_features)`` -holding the training samples, and an array Y of shape ``(n_samples,)`` -holding the target values (class labels) for the training samples:: +The ``l2_regularization`` parameter acts as a regularizer for the loss function, +and corresponds to :math:`\lambda` in the following expression (see equation (2) +in [XGBoost]_): - >>> from sklearn.ensemble import RandomForestClassifier - >>> X = [[0, 0], [1, 1]] - >>> Y = [0, 1] - >>> clf = RandomForestClassifier(n_estimators=10) - >>> clf = clf.fit(X, Y) +.. math:: -Like :ref:`decision trees `, forests of trees also extend to -:ref:`multi-output problems ` (if Y is an array -of shape ``(n_samples, n_outputs)``). + \mathcal{L}(\phi) = \sum_i l(\hat{y}_i, y_i) + \frac12 \sum_k \lambda ||w_k||^2 + +|details-start| +**Details on l2 regularization**: +|details-split| + +It is important to notice that the loss term :math:`l(\hat{y}_i, y_i)` describes +only half of the actual loss function except for the pinball loss and absolute +error. + +The index :math:`k` refers to the k-th tree in the ensemble of trees. In the +case of regression and binary classification, gradient boosting models grow one +tree per iteration, then :math:`k` runs up to `max_iter`. In the case of +multiclass classification problems, the maximal value of the index :math:`k` is +`n_classes` :math:`\times` `max_iter`. + +If :math:`T_k` denotes the number of leaves in the k-th tree, then :math:`w_k` +is a vector of length :math:`T_k`, which contains the leaf values of the form `w += -sum_gradient / (sum_hessian + l2_regularization)` (see equation (5) in +[XGBoost]_). + +The leaf values :math:`w_k` are derived by dividing the sum of the gradients of +the loss function by the combined sum of hessians. Adding the regularization to +the denominator penalizes the leaves with small hessians (flat regions), +resulting in smaller updates. Those :math:`w_k` values contribute then to the +model's prediction for a given input that ends up in the corresponding leaf. The +final prediction is the sum of the base prediction and the contributions from +each tree. The result of that sum is then transformed by the inverse link +function depending on the choice of the loss function (see +:ref:`gradient_boosting_formulation`). + +Notice that the original paper [XGBoost]_ introduces a term :math:`\gamma\sum_k +T_k` that penalizes the number of leaves (making it a smooth version of +`max_leaf_nodes`) not presented here as it is not implemented in scikit-learn; +whereas :math:`\lambda` penalizes the magnitude of the individual tree +predictions before being rescaled by the learning rate, see +:ref:`gradient_boosting_shrinkage`. + +|details-end| -Random Forests --------------- +Note that **early-stopping is enabled by default if the number of samples is +larger than 10,000**. The early-stopping behaviour is controlled via the +``early_stopping``, ``scoring``, ``validation_fraction``, +``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop +using an arbitrary :term:`scorer`, or just the training or validation loss. +Note that for technical reasons, using a callable as a scorer is significantly slower +than using the loss. By default, early-stopping is performed if there are at least +10,000 samples in the training set, using the validation loss. -In random forests (see :class:`RandomForestClassifier` and -:class:`RandomForestRegressor` classes), each tree in the ensemble is built -from a sample drawn with replacement (i.e., a bootstrap sample) from the -training set. +.. _nan_support_hgbt: -Furthermore, when splitting each node during the construction of a tree, the -best split is found either from all input features or a random subset of size -``max_features``. (See the :ref:`parameter tuning guidelines -` for more details). +Missing values support +^^^^^^^^^^^^^^^^^^^^^^ -The purpose of these two sources of randomness is to decrease the variance of -the forest estimator. Indeed, individual decision trees typically exhibit high -variance and tend to overfit. The injected randomness in forests yield decision -trees with somewhat decoupled prediction errors. By taking an average of those -predictions, some errors can cancel out. Random forests achieve a reduced -variance by combining diverse trees, sometimes at the cost of a slight increase -in bias. In practice the variance reduction is often significant hence yielding -an overall better model. +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` have built-in support for missing +values (NaNs). -In contrast to the original publication [B2001]_, the scikit-learn -implementation combines classifiers by averaging their probabilistic -prediction, instead of letting each classifier vote for a single class. +During training, the tree grower learns at each split point whether samples +with missing values should go to the left or right child, based on the +potential gain. When predicting, samples with missing values are assigned to +the left or right child consequently:: -Extremely Randomized Trees --------------------------- + >>> from sklearn.ensemble import HistGradientBoostingClassifier + >>> import numpy as np -In extremely randomized trees (see :class:`ExtraTreesClassifier` -and :class:`ExtraTreesRegressor` classes), randomness goes one step -further in the way splits are computed. As in random forests, a random -subset of candidate features is used, but instead of looking for the -most discriminative thresholds, thresholds are drawn at random for each -candidate feature and the best of these randomly-generated thresholds is -picked as the splitting rule. This usually allows to reduce the variance -of the model a bit more, at the expense of a slightly greater increase -in bias:: + >>> X = np.array([0, 1, 2, np.nan]).reshape(-1, 1) + >>> y = [0, 0, 1, 1] - >>> from sklearn.model_selection import cross_val_score - >>> from sklearn.datasets import make_blobs - >>> from sklearn.ensemble import RandomForestClassifier - >>> from sklearn.ensemble import ExtraTreesClassifier - >>> from sklearn.tree import DecisionTreeClassifier + >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y) + >>> gbdt.predict(X) + array([0, 0, 1, 1]) - >>> X, y = make_blobs(n_samples=10000, n_features=10, centers=100, - ... random_state=0) +When the missingness pattern is predictive, the splits can be performed on +whether the feature value is missing or not:: - >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2, - ... random_state=0) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() - 0.98... + >>> X = np.array([0, np.nan, 1, 2, np.nan]).reshape(-1, 1) + >>> y = [0, 1, 0, 0, 1] + >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1, + ... max_depth=2, + ... learning_rate=1, + ... max_iter=1).fit(X, y) + >>> gbdt.predict(X) + array([0, 1, 0, 0, 1]) - >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None, - ... min_samples_split=2, random_state=0) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() - 0.999... +If no missing values were encountered for a given feature during training, +then samples with missing values are mapped to whichever child has the most +samples. - >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, - ... min_samples_split=2, random_state=0) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() > 0.999 - True +.. topic:: Examples: -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_iris_001.png - :target: ../auto_examples/ensemble/plot_forest_iris.html - :align: center - :scale: 75% + * :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` -.. _random_forest_parameters: +.. _sw_hgbdt: -Parameters ----------- +Sample weight support +^^^^^^^^^^^^^^^^^^^^^ -The main parameters to adjust when using these methods is ``n_estimators`` and -``max_features``. The former is the number of trees in the forest. The larger -the better, but also the longer it will take to compute. In addition, note that -results will stop getting significantly better beyond a critical number of -trees. The latter is the size of the random subsets of features to consider -when splitting a node. The lower the greater the reduction of variance, but -also the greater the increase in bias. Empirical good default values are -``max_features=1.0`` or equivalently ``max_features=None`` (always considering -all features instead of a random subset) for regression problems, and -``max_features="sqrt"`` (using a random subset of size ``sqrt(n_features)``) -for classification tasks (where ``n_features`` is the number of features in -the data). The default value of ``max_features=1.0`` is equivalent to bagged -trees and more randomness can be achieved by setting smaller values (e.g. 0.3 -is a typical default in the literature). Good results are often achieved when -setting ``max_depth=None`` in combination with ``min_samples_split=2`` (i.e., -when fully developing the trees). Bear in mind though that these values are -usually not optimal, and might result in models that consume a lot of RAM. -The best parameter values should always be cross-validated. In addition, note -that in random forests, bootstrap samples are used by default -(``bootstrap=True``) while the default strategy for extra-trees is to use the -whole dataset (``bootstrap=False``). When using bootstrap sampling the -generalization error can be estimated on the left out or out-of-bag samples. -This can be enabled by setting ``oob_score=True``. +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` support sample weights during +:term:`fit`. -.. note:: +The following toy example demonstrates that samples with a sample weight of zero are ignored: - The size of the model with the default parameters is :math:`O( M * N * log (N) )`, - where :math:`M` is the number of trees and :math:`N` is the number of samples. - In order to reduce the size of the model, you can change these parameters: - ``min_samples_split``, ``max_leaf_nodes``, ``max_depth`` and ``min_samples_leaf``. + >>> X = [[1, 0], + ... [1, 0], + ... [1, 0], + ... [0, 1]] + >>> y = [0, 0, 1, 0] + >>> # ignore the first 2 training samples by setting their weight to 0 + >>> sample_weight = [0, 0, 1, 1] + >>> gb = HistGradientBoostingClassifier(min_samples_leaf=1) + >>> gb.fit(X, y, sample_weight=sample_weight) + HistGradientBoostingClassifier(...) + >>> gb.predict([[1, 0]]) + array([1]) + >>> gb.predict_proba([[1, 0]])[0, 1] + 0.99... -Parallelization ---------------- +As you can see, the `[1, 0]` is comfortably classified as `1` since the first +two samples are ignored due to their sample weights. -Finally, this module also features the parallel construction of the trees -and the parallel computation of the predictions through the ``n_jobs`` -parameter. If ``n_jobs=k`` then computations are partitioned into -``k`` jobs, and run on ``k`` cores of the machine. If ``n_jobs=-1`` -then all cores available on the machine are used. Note that because of -inter-process communication overhead, the speedup might not be linear -(i.e., using ``k`` jobs will unfortunately not be ``k`` times as -fast). Significant speedup can still be achieved though when building -a large number of trees, or when building a single tree requires a fair -amount of time (e.g., on large datasets). +Implementation detail: taking sample weights into account amounts to +multiplying the gradients (and the hessians) by the sample weights. Note that +the binning stage (specifically the quantiles computation) does not take the +weights into account. -.. topic:: Examples: +.. _categorical_support_gbdt: - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` +Categorical Features Support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. topic:: References +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` have native support for categorical +features: they can consider splits on non-ordered, categorical data. - .. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001. +For datasets with categorical features, using the native categorical support +is often better than relying on one-hot encoding +(:class:`~sklearn.preprocessing.OneHotEncoder`), because one-hot encoding +requires more tree depth to achieve equivalent splits. It is also usually +better to rely on the native categorical support rather than to treat +categorical features as continuous (ordinal), which happens for ordinal-encoded +categorical data, since categories are nominal quantities where order does not +matter. - .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. +To enable categorical support, a boolean mask can be passed to the +`categorical_features` parameter, indicating which feature is categorical. In +the following, the first feature will be treated as categorical and the +second feature as numerical:: - * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized - trees", Machine Learning, 63(1), 3-42, 2006. + >>> gbdt = HistGradientBoostingClassifier(categorical_features=[True, False]) -.. _random_forest_feature_importance: +Equivalently, one can pass a list of integers indicating the indices of the +categorical features:: -Feature importance evaluation ------------------------------ + >>> gbdt = HistGradientBoostingClassifier(categorical_features=[0]) -The relative rank (i.e. depth) of a feature used as a decision node in a -tree can be used to assess the relative importance of that feature with -respect to the predictability of the target variable. Features used at -the top of the tree contribute to the final prediction decision of a -larger fraction of the input samples. The **expected fraction of the -samples** they contribute to can thus be used as an estimate of the -**relative importance of the features**. In scikit-learn, the fraction of -samples a feature contributes to is combined with the decrease in impurity -from splitting them to create a normalized estimate of the predictive power -of that feature. +When the input is a DataFrame, it is also possible to pass a list of column +names:: -By **averaging** the estimates of predictive ability over several randomized -trees one can **reduce the variance** of such an estimate and use it -for feature selection. This is known as the mean decrease in impurity, or MDI. -Refer to [L2014]_ for more information on MDI and feature importance -evaluation with Random Forests. + >>> gbdt = HistGradientBoostingClassifier(categorical_features=["site", "manufacturer"]) -.. warning:: +Finally, when the input is a DataFrame we can use +`categorical_features="from_dtype"` in which case all columns with a categorical +`dtype` will be treated as categorical features. - The impurity-based feature importances computed on tree-based models suffer - from two flaws that can lead to misleading conclusions. First they are - computed on statistics derived from the training dataset and therefore **do - not necessarily inform us on which features are most important to make good - predictions on held-out dataset**. Secondly, **they favor high cardinality - features**, that is features with many unique values. - :ref:`permutation_importance` is an alternative to impurity-based feature - importance that does not suffer from these flaws. These two methods of - obtaining feature importance are explored in: - :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`. +The cardinality of each categorical feature must be less than the `max_bins` +parameter. For an example using histogram-based gradient boosting on categorical +features, see +:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`. -The following example shows a color-coded representation of the relative -importances of each individual pixel for a face recognition task using -a :class:`ExtraTreesClassifier` model. +If there are missing values during training, the missing values will be +treated as a proper category. If there are no missing values during training, +then at prediction time, missing values are mapped to the child node that has +the most samples (just like for continuous features). When predicting, +categories that were not seen during fit time will be treated as missing +values. -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_importances_faces_001.png - :target: ../auto_examples/ensemble/plot_forest_importances_faces.html - :align: center - :scale: 75 +|details-start| +**Split finding with categorical features**: +|details-split| -In practice those estimates are stored as an attribute named -``feature_importances_`` on the fitted model. This is an array with shape -``(n_features,)`` whose values are positive and sum to 1.0. The higher -the value, the more important is the contribution of the matching feature -to the prediction function. +The canonical way of considering +categorical splits in a tree is to consider +all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of +categories. This can quickly become prohibitive when :math:`K` is large. +Fortunately, since gradient boosting trees are always regression trees (even +for classification problems), there exist a faster strategy that can yield +equivalent splits. First, the categories of a feature are sorted according to +the variance of the target, for each category `k`. Once the categories are +sorted, one can consider *continuous partitions*, i.e. treat the categories +as if they were ordered continuous values (see Fisher [Fisher1958]_ for a +formal proof). As a result, only :math:`K - 1` splits need to be considered +instead of :math:`2^{K - 1} - 1`. The initial sorting is a +:math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of +:math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`. -.. topic:: Examples: +|details-end| - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py` +.. topic:: Examples: -.. topic:: References + * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py` - .. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to - Practice" <1407.7502>`, - PhD Thesis, U. of Liege, 2014. +.. _monotonic_cst_gbdt: -.. _random_trees_embedding: +Monotonic Constraints +^^^^^^^^^^^^^^^^^^^^^ -Totally Random Trees Embedding ------------------------------- +Depending on the problem at hand, you may have prior knowledge indicating +that a given feature should in general have a positive (or negative) effect +on the target value. For example, all else being equal, a higher credit +score should increase the probability of getting approved for a loan. +Monotonic constraints allow you to incorporate such prior knowledge into the +model. -:class:`RandomTreesEmbedding` implements an unsupervised transformation of the -data. Using a forest of completely random trees, :class:`RandomTreesEmbedding` -encodes the data by the indices of the leaves a data point ends up in. This -index is then encoded in a one-of-K manner, leading to a high dimensional, -sparse binary coding. -This coding can be computed very efficiently and can then be used as a basis -for other learning tasks. -The size and sparsity of the code can be influenced by choosing the number of -trees and the maximum depth per tree. For each tree in the ensemble, the coding -contains one entry of one. The size of the coding is at most ``n_estimators * 2 -** max_depth``, the maximum number of leaves in the forest. +For a predictor :math:`F` with two features: -As neighboring data points are more likely to lie within the same leaf of a -tree, the transformation performs an implicit, non-parametric density -estimation. +- a **monotonic increase constraint** is a constraint of the form: -.. topic:: Examples: + .. math:: + x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2) - * :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py` +- a **monotonic decrease constraint** is a constraint of the form: - * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear - dimensionality reduction techniques on handwritten digits. + .. math:: + x_1 \leq x_1' \implies F(x_1, x_2) \geq F(x_1', x_2) - * :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares - supervised and unsupervised tree based feature transformations. +You can specify a monotonic constraint on each feature using the +`monotonic_cst` parameter. For each feature, a value of 0 indicates no +constraint, while 1 and -1 indicate a monotonic increase and +monotonic decrease constraint, respectively:: -.. seealso:: + >>> from sklearn.ensemble import HistGradientBoostingRegressor - :ref:`manifold` techniques can also be useful to derive non-linear - representations of feature space, also these approaches focus also on - dimensionality reduction. + ... # monotonic increase, monotonic decrease, and no constraint on the 3 features + >>> gbdt = HistGradientBoostingRegressor(monotonic_cst=[1, -1, 0]) +In a binary classification context, imposing a monotonic increase (decrease) constraint means that higher values of the feature are supposed +to have a positive (negative) effect on the probability of samples +to belong to the positive class. -.. _adaboost: +Nevertheless, monotonic constraints only marginally constrain feature effects on the output. +For instance, monotonic increase and decrease constraints cannot be used to enforce the +following modelling constraint: -AdaBoost -======== +.. math:: + x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2') -The module :mod:`sklearn.ensemble` includes the popular boosting algorithm -AdaBoost, introduced in 1995 by Freund and Schapire [FS1995]_. +Also, monotonic constraints are not supported for multiclass classification. -The core principle of AdaBoost is to fit a sequence of weak learners (i.e., -models that are only slightly better than random guessing, such as small -decision trees) on repeatedly modified versions of the data. The predictions -from all of them are then combined through a weighted majority vote (or sum) to -produce the final prediction. The data modifications at each so-called boosting -iteration consist of applying weights :math:`w_1`, :math:`w_2`, ..., :math:`w_N` -to each of the training samples. Initially, those weights are all set to -:math:`w_i = 1/N`, so that the first step simply trains a weak learner on the -original data. For each successive iteration, the sample weights are -individually modified and the learning algorithm is reapplied to the reweighted -data. At a given step, those training examples that were incorrectly predicted -by the boosted model induced at the previous step have their weights increased, -whereas the weights are decreased for those that were predicted correctly. As -iterations proceed, examples that are difficult to predict receive -ever-increasing influence. Each subsequent weak learner is thereby forced to -concentrate on the examples that are missed by the previous ones in the sequence -[HTF]_. +.. note:: + Since categories are unordered quantities, it is not possible to enforce + monotonic constraints on categorical features. -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_adaboost_hastie_10_2_001.png - :target: ../auto_examples/ensemble/plot_adaboost_hastie_10_2.html - :align: center - :scale: 75 +.. topic:: Examples: -AdaBoost can be used both for classification and regression problems: + * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py` + * :ref:`sphx_glr_auto_examples_ensemble_plot_hgbt_regression.py` - - For multi-class classification, :class:`AdaBoostClassifier` implements - AdaBoost-SAMME and AdaBoost-SAMME.R [ZZRH2009]_. +.. _interaction_cst_hgbt: - - For regression, :class:`AdaBoostRegressor` implements AdaBoost.R2 [D1997]_. +Interaction constraints +^^^^^^^^^^^^^^^^^^^^^^^ -Usage ------ +A priori, the histogram gradient boosted trees are allowed to use any feature +to split a node into child nodes. This creates so called interactions between +features, i.e. usage of different features as split along a branch. Sometimes, +one wants to restrict the possible interactions, see [Mayer2022]_. This can be +done by the parameter ``interaction_cst``, where one can specify the indices +of features that are allowed to interact. +For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]`` +forbids all interactions. +The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly +interacting features. Features 0 and 1 may interact with each other, as well +as features 1 and 2. But note that features 0 and 2 are forbidden to interact. +The following depicts a tree and the possible splits of the tree: -The following example shows how to fit an AdaBoost classifier with 100 weak -learners:: +.. code-block:: none - >>> from sklearn.model_selection import cross_val_score - >>> from sklearn.datasets import load_iris - >>> from sklearn.ensemble import AdaBoostClassifier + 1 <- Both constraint groups could be applied from now on + / \ + 1 2 <- Left split still fulfills both constraint groups. + / \ / \ Right split at feature 2 has only group {1, 2} from now on. - >>> X, y = load_iris(return_X_y=True) - >>> clf = AdaBoostClassifier(n_estimators=100) - >>> scores = cross_val_score(clf, X, y, cv=5) - >>> scores.mean() - 0.9... +LightGBM uses the same logic for overlapping groups. -The number of weak learners is controlled by the parameter ``n_estimators``. The -``learning_rate`` parameter controls the contribution of the weak learners in -the final combination. By default, weak learners are decision stumps. Different -weak learners can be specified through the ``estimator`` parameter. -The main parameters to tune to obtain good results are ``n_estimators`` and -the complexity of the base estimators (e.g., its depth ``max_depth`` or -minimum required number of samples to consider a split ``min_samples_split``). +Note that features not listed in ``interaction_cst`` are automatically +assigned an interaction group for themselves. With again 3 features, this +means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_hastie_10_2.py` compares the - classification error of a decision stump, decision tree, and a boosted - decision stump using AdaBoost-SAMME and AdaBoost-SAMME.R. - - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance - of AdaBoost-SAMME and AdaBoost-SAMME.R on a multi-class problem. - - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary - and decision function values for a non-linearly separable two-class problem - using AdaBoost-SAMME. - - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression - with the AdaBoost.R2 algorithm. + * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` .. topic:: References - .. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of - On-Line Learning and an Application to Boosting", 1997. + .. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio. + 2022. :doi:`Machine Learning Applications to Land and Structure Valuation + <10.3390/jrfm15050193>`. + Journal of Risk and Financial Management 15, no. 5: 193 - .. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost", - 2009. +Low-level parallelism +^^^^^^^^^^^^^^^^^^^^^ - .. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997. - .. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of - Statistical Learning Ed. 2", Springer, 2009. +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` use OpenMP +for parallelization through Cython. For more details on how to control the +number of threads, please refer to our :ref:`parallelism` notes. +The following parts are parallelized: -.. _gradient_boosting: +- mapping samples from real values to integer-valued bins (finding the bin + thresholds is however sequential) +- building histograms is parallelized over features +- finding the best split point at a node is parallelized over features +- during fit, mapping samples into the left and right children is + parallelized over samples +- gradient and hessians computations are parallelized over samples +- predicting is parallelized over samples -Gradient Tree Boosting -====================== +.. _Why_it's_faster: -`Gradient Tree Boosting `_ -or Gradient Boosted Decision Trees (GBDT) is a generalization -of boosting to arbitrary differentiable loss functions, see the seminal work of -[Friedman2001]_. GBDT is an accurate and effective off-the-shelf procedure that can be -used for both regression and classification problems in a -variety of areas including Web search ranking and ecology. +Why it's faster +^^^^^^^^^^^^^^^ -The module :mod:`sklearn.ensemble` provides methods -for both classification and regression via gradient boosted decision -trees. +The bottleneck of a gradient boosting procedure is building the decision +trees. Building a traditional decision tree (as in the other GBDTs +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`) +requires sorting the samples at each node (for +each feature). Sorting is needed so that the potential gain of a split point +can be computed efficiently. Splitting a single node has thus a complexity +of :math:`\mathcal{O}(n_\text{features} \times n \log(n))` where :math:`n` +is the number of samples at the node. -.. note:: +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor`, in contrast, do not require sorting the +feature values and instead use a data-structure called a histogram, where the +samples are implicitly ordered. Building a histogram has a +:math:`\mathcal{O}(n)` complexity, so the node splitting procedure has a +:math:`\mathcal{O}(n_\text{features} \times n)` complexity, much smaller +than the previous one. In addition, instead of considering :math:`n` split +points, we consider only ``max_bins`` split points, which might be much +smaller. + +In order to build histograms, the input data `X` needs to be binned into +integer-valued bins. This binning procedure does require sorting the feature +values, but it only happens once at the very beginning of the boosting process +(not at each node, like in :class:`GradientBoostingClassifier` and +:class:`GradientBoostingRegressor`). - Scikit-learn 0.21 introduces two new implementations of - gradient boosting trees, namely :class:`HistGradientBoostingClassifier` - and :class:`HistGradientBoostingRegressor`, inspired by - `LightGBM `__ (See [LightGBM]_). +Finally, many parts of the implementation of +:class:`HistGradientBoostingClassifier` and +:class:`HistGradientBoostingRegressor` are parallelized. - These histogram-based estimators can be **orders of magnitude faster** - than :class:`GradientBoostingClassifier` and - :class:`GradientBoostingRegressor` when the number of samples is larger - than tens of thousands of samples. +.. topic:: References + + .. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree + Boosting System" <1603.02754>` - They also have built-in support for missing values, which avoids the need - for an imputer. + .. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient + BoostingDecision Tree" `_ - These estimators are described in more detail below in - :ref:`histogram_based_gradient_boosting`. + .. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity" + `_ + Journal of the American Statistical Association, 53, 789-798. - The following guide focuses on :class:`GradientBoostingClassifier` and - :class:`GradientBoostingRegressor`, which might be preferred for small - sample sizes since binning may lead to split points that are too approximate - in this setting. +:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` +---------------------------------------------------------------------------- + The usage and the parameters of :class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` are described below. The 2 most important parameters of these estimators are `n_estimators` and `learning_rate`. -Classification ---------------- +|details-start| +**Classification** +|details-split| :class:`GradientBoostingClassifier` supports both binary and multi-class classification. @@ -533,8 +540,11 @@ depth via ``max_depth`` or by setting the number of leaf nodes via :class:`HistGradientBoostingClassifier` as an alternative to :class:`GradientBoostingClassifier` . -Regression ----------- +|details-end| + +|details-start| +**Regression** +|details-split| :class:`GradientBoostingRegressor` supports a number of :ref:`different loss functions ` @@ -564,8 +574,8 @@ with least squares loss and 500 base learners to the diabetes dataset (:func:`sklearn.datasets.load_diabetes`). The plot shows the train and test error at each iteration. The train error at each iteration is stored in the -:attr:`~GradientBoostingRegressor.train_score_` attribute -of the gradient boosting model. The test error at each iterations can be obtained +`train_score_` attribute of the gradient boosting model. +The test error at each iterations can be obtained via the :meth:`~GradientBoostingRegressor.staged_predict` method which returns a generator that yields the predictions at each stage. Plots like these can be used to determine the optimal number of trees (i.e. ``n_estimators``) by early stopping. @@ -575,6 +585,8 @@ to determine the optimal number of trees (i.e. ``n_estimators``) by early stoppi :align: center :scale: 75 +|details-end| + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_regression.py` @@ -583,7 +595,7 @@ to determine the optimal number of trees (i.e. ``n_estimators``) by early stoppi .. _gradient_boosting_warm_start: Fitting additional weak-learners --------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Both :class:`GradientBoostingRegressor` and :class:`GradientBoostingClassifier` support ``warm_start=True`` which allows you to add more estimators to an already @@ -591,7 +603,22 @@ fitted model. :: - >>> _ = est.set_params(n_estimators=200, warm_start=True) # set warm_start and new nr of trees + >>> import numpy as np + >>> from sklearn.metrics import mean_squared_error + >>> from sklearn.datasets import make_friedman1 + >>> from sklearn.ensemble import GradientBoostingRegressor + + >>> X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0) + >>> X_train, X_test = X[:200], X[200:] + >>> y_train, y_test = y[:200], y[200:] + >>> est = GradientBoostingRegressor( + ... n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, + ... loss='squared_error' + ... ) + >>> est = est.fit(X_train, y_train) # fit with 100 trees + >>> mean_squared_error(y_test, est.predict(X_test)) + 5.00... + >>> _ = est.set_params(n_estimators=200, warm_start=True) # set warm_start and increase num of trees >>> _ = est.fit(X_train, y_train) # fit additional 100 trees to est >>> mean_squared_error(y_test, est.predict(X_test)) 3.84... @@ -599,7 +626,7 @@ fitted model. .. _gradient_boosting_tree_size: Controlling the tree size -------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^ The size of the regression tree base learners defines the level of variable interactions that can be captured by the gradient boosting model. In general, @@ -625,21 +652,24 @@ The parameter ``max_leaf_nodes`` corresponds to the variable ``J`` in the chapter on gradient boosting in [Friedman2001]_ and is related to the parameter ``interaction.depth`` in R's gbm package where ``max_leaf_nodes == interaction.depth + 1`` . +.. _gradient_boosting_formulation: + Mathematical formulation -------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^ We first present GBRT for regression, and then detail the classification case. -Regression -^^^^^^^^^^ +|details-start| +**Regression** +|details-split| GBRT regressors are additive models whose prediction :math:`\hat{y}_i` for a given input :math:`x_i` is of the following form: - .. math:: +.. math:: - \hat{y}_i = F_M(x_i) = \sum_{m=1}^{M} h_m(x_i) + \hat{y}_i = F_M(x_i) = \sum_{m=1}^{M} h_m(x_i) where the :math:`h_m` are estimators called *weak learners* in the context of boosting. Gradient Tree Boosting uses :ref:`decision tree regressors @@ -648,17 +678,17 @@ of boosting. Gradient Tree Boosting uses :ref:`decision tree regressors Similar to other boosting algorithms, a GBRT is built in a greedy fashion: - .. math:: +.. math:: - F_m(x) = F_{m-1}(x) + h_m(x), + F_m(x) = F_{m-1}(x) + h_m(x), where the newly added tree :math:`h_m` is fitted in order to minimize a sum of losses :math:`L_m`, given the previous ensemble :math:`F_{m-1}`: - .. math:: +.. math:: - h_m = \arg\min_{h} L_m = \arg\min_{h} \sum_{i=1}^{n} - l(y_i, F_{m-1}(x_i) + h(x_i)), + h_m = \arg\min_{h} L_m = \arg\min_{h} \sum_{i=1}^{n} + l(y_i, F_{m-1}(x_i) + h(x_i)), where :math:`l(y_i, F(x_i))` is defined by the `loss` parameter, detailed in the next section. @@ -671,12 +701,12 @@ argument. Using a first-order Taylor approximation, the value of :math:`l` can be approximated as follows: - .. math:: +.. math:: - l(y_i, F_{m-1}(x_i) + h_m(x_i)) \approx - l(y_i, F_{m-1}(x_i)) - + h_m(x_i) - \left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} \right]_{F=F_{m - 1}}. + l(y_i, F_{m-1}(x_i) + h_m(x_i)) \approx + l(y_i, F_{m-1}(x_i)) + + h_m(x_i) + \left[ \frac{\partial l(y_i, F(x_i))}{\partial F(x_i)} \right]_{F=F_{m - 1}}. .. note:: @@ -693,9 +723,9 @@ differentiable. We will denote it by :math:`g_i`. Removing the constant terms, we have: - .. math:: +.. math:: - h_m \approx \arg\min_{h} \sum_{i=1}^{n} h(x_i) g_i + h_m \approx \arg\min_{h} \sum_{i=1}^{n} h(x_i) g_i This is minimized if :math:`h(x_i)` is fitted to predict a value that is proportional to the negative gradient :math:`-g_i`. Therefore, at each @@ -714,8 +744,11 @@ space. update is loss-dependent: for the absolute error loss, the value of a leaf is updated to the median of the samples in that leaf. -Classification -^^^^^^^^^^^^^^ +|details-end| + +|details-start| +**Classification** +|details-split| Gradient boosting for classification is very similar to the regression case. However, the sum of the trees :math:`F_M(x_i) = \sum_m h_m(x_i)` is not @@ -736,53 +769,64 @@ still a regressor, not a classifier. This is because the sub-estimators are trained to predict (negative) *gradients*, which are always continuous quantities. +|details-end| + .. _gradient_boosting_loss: Loss Functions --------------- +^^^^^^^^^^^^^^ The following loss functions are supported and can be specified using the parameter ``loss``: - * Regression - - * Squared error (``'squared_error'``): The natural choice for regression - due to its superior computational properties. The initial model is - given by the mean of the target values. - * Absolute error (``'absolute_error'``): A robust loss function for - regression. The initial model is given by the median of the - target values. - * Huber (``'huber'``): Another robust loss function that combines - least squares and least absolute deviation; use ``alpha`` to - control the sensitivity with regards to outliers (see [Friedman2001]_ for - more details). - * Quantile (``'quantile'``): A loss function for quantile regression. - Use ``0 < alpha < 1`` to specify the quantile. This loss function - can be used to create prediction intervals - (see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`). - - * Classification - - * Binary log-loss (``'log-loss'``): The binomial - negative log-likelihood loss function for binary classification. It provides - probability estimates. The initial model is given by the - log odds-ratio. - * Multi-class log-loss (``'log-loss'``): The multinomial - negative log-likelihood loss function for multi-class classification with - ``n_classes`` mutually exclusive classes. It provides - probability estimates. The initial model is given by the - prior probability of each class. At each iteration ``n_classes`` - regression trees have to be constructed which makes GBRT rather - inefficient for data sets with a large number of classes. - * Exponential loss (``'exponential'``): The same loss function - as :class:`AdaBoostClassifier`. Less robust to mislabeled - examples than ``'log-loss'``; can only be used for binary - classification. +|details-start| +**Regression** +|details-split| + + * Squared error (``'squared_error'``): The natural choice for regression + due to its superior computational properties. The initial model is + given by the mean of the target values. + * Absolute error (``'absolute_error'``): A robust loss function for + regression. The initial model is given by the median of the + target values. + * Huber (``'huber'``): Another robust loss function that combines + least squares and least absolute deviation; use ``alpha`` to + control the sensitivity with regards to outliers (see [Friedman2001]_ for + more details). + * Quantile (``'quantile'``): A loss function for quantile regression. + Use ``0 < alpha < 1`` to specify the quantile. This loss function + can be used to create prediction intervals + (see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`). + +|details-end| + + +|details-start| +**Classification** +|details-split| + + * Binary log-loss (``'log-loss'``): The binomial + negative log-likelihood loss function for binary classification. It provides + probability estimates. The initial model is given by the + log odds-ratio. + * Multi-class log-loss (``'log-loss'``): The multinomial + negative log-likelihood loss function for multi-class classification with + ``n_classes`` mutually exclusive classes. It provides + probability estimates. The initial model is given by the + prior probability of each class. At each iteration ``n_classes`` + regression trees have to be constructed which makes GBRT rather + inefficient for data sets with a large number of classes. + * Exponential loss (``'exponential'``): The same loss function + as :class:`AdaBoostClassifier`. Less robust to mislabeled + examples than ``'log-loss'``; can only be used for binary + classification. + +|details-end| .. _gradient_boosting_shrinkage: Shrinkage via learning rate ---------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^ [Friedman2001]_ proposed a simple regularization strategy that scales the contribution of each weak learner by a constant factor :math:`\nu`: @@ -801,12 +845,14 @@ of ``learning_rate`` require larger numbers of weak learners to maintain a constant training error. Empirical evidence suggests that small values of ``learning_rate`` favor better test error. [HTF]_ recommend to set the learning rate to a small constant -(e.g. ``learning_rate <= 0.1``) and choose ``n_estimators`` by early -stopping. For a more detailed discussion of the interaction between +(e.g. ``learning_rate <= 0.1``) and choose ``n_estimators`` large enough +that early stopping applies, +see :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_early_stopping.py` +for a more detailed discussion of the interaction between ``learning_rate`` and ``n_estimators`` see [R2007]_. Subsampling ------------ +^^^^^^^^^^^^ [Friedman2002]_ proposed stochastic gradient boosting, which combines gradient boosting with bootstrap averaging (bagging). At each iteration @@ -835,10 +881,9 @@ parameter. Stochastic gradient boosting allows to compute out-of-bag estimates of the test deviance by computing the improvement in deviance on the examples that are not included in the bootstrap sample (i.e. the out-of-bag examples). -The improvements are stored in the attribute -:attr:`~GradientBoostingRegressor.oob_improvement_`. ``oob_improvement_[i]`` holds -the improvement in terms of the loss on the OOB samples if you add the i-th stage -to the current predictions. +The improvements are stored in the attribute `oob_improvement_`. +``oob_improvement_[i]`` holds the improvement in terms of the loss on the OOB samples +if you add the i-th stage to the current predictions. Out-of-bag estimates can be used for model selection, for example to determine the optimal number of iterations. OOB estimates are usually very pessimistic thus we recommend to use cross-validation instead and only use OOB if cross-validation @@ -851,7 +896,7 @@ is too time consuming. * :ref:`sphx_glr_auto_examples_ensemble_plot_ensemble_oob.py` Interpretation with feature importance --------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Individual decision trees can be interpreted easily by simply visualizing the tree structure. Gradient boosting models, however, @@ -908,373 +953,408 @@ based on permutation of the features. .. [R2007] G. Ridgeway (2006). `Generalized Boosted Models: A guide to the gbm package `_ -.. _histogram_based_gradient_boosting: - -Histogram-Based Gradient Boosting -================================= +.. _forest: -Scikit-learn 0.21 introduced two new implementations of -gradient boosting trees, namely :class:`HistGradientBoostingClassifier` -and :class:`HistGradientBoostingRegressor`, inspired by -`LightGBM `__ (See [LightGBM]_). +Random forests and other randomized tree ensembles +=================================================== -These histogram-based estimators can be **orders of magnitude faster** -than :class:`GradientBoostingClassifier` and -:class:`GradientBoostingRegressor` when the number of samples is larger -than tens of thousands of samples. +The :mod:`sklearn.ensemble` module includes two averaging algorithms based +on randomized :ref:`decision trees `: the RandomForest algorithm +and the Extra-Trees method. Both algorithms are perturb-and-combine +techniques [B1998]_ specifically designed for trees. This means a diverse +set of classifiers is created by introducing randomness in the classifier +construction. The prediction of the ensemble is given as the averaged +prediction of the individual classifiers. -They also have built-in support for missing values, which avoids the need -for an imputer. +As other classifiers, forest classifiers have to be fitted with two +arrays: a sparse or dense array X of shape ``(n_samples, n_features)`` +holding the training samples, and an array Y of shape ``(n_samples,)`` +holding the target values (class labels) for the training samples:: -These fast estimators first bin the input samples ``X`` into -integer-valued bins (typically 256 bins) which tremendously reduces the -number of splitting points to consider, and allows the algorithm to -leverage integer-based data structures (histograms) instead of relying on -sorted continuous values when building the trees. The API of these -estimators is slightly different, and some of the features from -:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor` -are not yet supported, for instance some loss functions. + >>> from sklearn.ensemble import RandomForestClassifier + >>> X = [[0, 0], [1, 1]] + >>> Y = [0, 1] + >>> clf = RandomForestClassifier(n_estimators=10) + >>> clf = clf.fit(X, Y) -.. topic:: Examples: +Like :ref:`decision trees `, forests of trees also extend to +:ref:`multi-output problems ` (if Y is an array +of shape ``(n_samples, n_outputs)``). - * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` +Random Forests +-------------- -Usage ------ +In random forests (see :class:`RandomForestClassifier` and +:class:`RandomForestRegressor` classes), each tree in the ensemble is built +from a sample drawn with replacement (i.e., a bootstrap sample) from the +training set. -Most of the parameters are unchanged from -:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`. -One exception is the ``max_iter`` parameter that replaces ``n_estimators``, and -controls the number of iterations of the boosting process:: +Furthermore, when splitting each node during the construction of a tree, the +best split is found through an exhaustive search of the features values of +either all input features or a random subset of size ``max_features``. +(See the :ref:`parameter tuning guidelines ` for more details.) - >>> from sklearn.ensemble import HistGradientBoostingClassifier - >>> from sklearn.datasets import make_hastie_10_2 +The purpose of these two sources of randomness is to decrease the variance of +the forest estimator. Indeed, individual decision trees typically exhibit high +variance and tend to overfit. The injected randomness in forests yield decision +trees with somewhat decoupled prediction errors. By taking an average of those +predictions, some errors can cancel out. Random forests achieve a reduced +variance by combining diverse trees, sometimes at the cost of a slight increase +in bias. In practice the variance reduction is often significant hence yielding +an overall better model. - >>> X, y = make_hastie_10_2(random_state=0) - >>> X_train, X_test = X[:2000], X[2000:] - >>> y_train, y_test = y[:2000], y[2000:] +In contrast to the original publication [B2001]_, the scikit-learn +implementation combines classifiers by averaging their probabilistic +prediction, instead of letting each classifier vote for a single class. - >>> clf = HistGradientBoostingClassifier(max_iter=100).fit(X_train, y_train) - >>> clf.score(X_test, y_test) - 0.8965 +A competitive alternative to random forests are +:ref:`histogram_based_gradient_boosting` (HGBT) models: + +- Building trees: Random forests typically rely on deep trees (that overfit + individually) which uses much computational resources, as they require + several splittings and evaluations of candidate splits. Boosting models + build shallow trees (that underfit individually) which are faster to fit + and predict. + +- Sequential boosting: In HGBT, the decision trees are built sequentially, + where each tree is trained to correct the errors made by the previous ones. + This allows them to iteratively improve the model's performance using + relatively few trees. In contrast, random forests use a majority vote to + predict the outcome, which can require a larger number of trees to achieve + the same level of accuracy. + +- Efficient binning: HGBT uses an efficient binning algorithm that can handle + large datasets with a high number of features. The binning algorithm can + pre-process the data to speed up the subsequent tree construction (see + :ref:`Why it's faster `). In contrast, the scikit-learn + implementation of random forests does not use binning and relies on exact + splitting, which can be computationally expensive. + +Overall, the computational cost of HGBT versus RF depends on the specific +characteristics of the dataset and the modeling task. It's a good idea +to try both models and compare their performance and computational efficiency +on your specific problem to determine which model is the best fit. -Available losses for regression are 'squared_error', -'absolute_error', which is less sensitive to outliers, and -'poisson', which is well suited to model counts and frequencies. For -classification, 'log_loss' is the only option. For binary classification it uses the -binary log loss, also kown as binomial deviance or binary cross-entropy. For -`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance -and categorical cross-entropy as alternative names. The appropriate loss version is -selected based on :term:`y` passed to :term:`fit`. +.. topic:: Examples: -The size of the trees can be controlled through the ``max_leaf_nodes``, -``max_depth``, and ``min_samples_leaf`` parameters. + * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py` -The number of bins used to bin the data is controlled with the ``max_bins`` -parameter. Using less bins acts as a form of regularization. It is -generally recommended to use as many bins as possible, which is the default. +Extremely Randomized Trees +-------------------------- -The ``l2_regularization`` parameter is a regularizer on the loss function and -corresponds to :math:`\lambda` in equation (2) of [XGBoost]_. +In extremely randomized trees (see :class:`ExtraTreesClassifier` +and :class:`ExtraTreesRegressor` classes), randomness goes one step +further in the way splits are computed. As in random forests, a random +subset of candidate features is used, but instead of looking for the +most discriminative thresholds, thresholds are drawn at random for each +candidate feature and the best of these randomly-generated thresholds is +picked as the splitting rule. This usually allows to reduce the variance +of the model a bit more, at the expense of a slightly greater increase +in bias:: -Note that **early-stopping is enabled by default if the number of samples is -larger than 10,000**. The early-stopping behaviour is controlled via the -``early_stopping``, ``scoring``, ``validation_fraction``, -``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop -using an arbitrary :term:`scorer`, or just the training or validation loss. -Note that for technical reasons, using a scorer is significantly slower than -using the loss. By default, early-stopping is performed if there are at least -10,000 samples in the training set, using the validation loss. + >>> from sklearn.model_selection import cross_val_score + >>> from sklearn.datasets import make_blobs + >>> from sklearn.ensemble import RandomForestClassifier + >>> from sklearn.ensemble import ExtraTreesClassifier + >>> from sklearn.tree import DecisionTreeClassifier -Missing values support ----------------------- + >>> X, y = make_blobs(n_samples=10000, n_features=10, centers=100, + ... random_state=0) -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` have built-in support for missing -values (NaNs). + >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2, + ... random_state=0) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() + 0.98... -During training, the tree grower learns at each split point whether samples -with missing values should go to the left or right child, based on the -potential gain. When predicting, samples with missing values are assigned to -the left or right child consequently:: + >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None, + ... min_samples_split=2, random_state=0) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() + 0.999... - >>> from sklearn.ensemble import HistGradientBoostingClassifier - >>> import numpy as np + >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None, + ... min_samples_split=2, random_state=0) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() > 0.999 + True - >>> X = np.array([0, 1, 2, np.nan]).reshape(-1, 1) - >>> y = [0, 0, 1, 1] +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_iris_001.png + :target: ../auto_examples/ensemble/plot_forest_iris.html + :align: center + :scale: 75% - >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y) - >>> gbdt.predict(X) - array([0, 0, 1, 1]) +.. _random_forest_parameters: -When the missingness pattern is predictive, the splits can be done on -whether the feature value is missing or not:: +Parameters +---------- - >>> X = np.array([0, np.nan, 1, 2, np.nan]).reshape(-1, 1) - >>> y = [0, 1, 0, 0, 1] - >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1, - ... max_depth=2, - ... learning_rate=1, - ... max_iter=1).fit(X, y) - >>> gbdt.predict(X) - array([0, 1, 0, 0, 1]) +The main parameters to adjust when using these methods is ``n_estimators`` and +``max_features``. The former is the number of trees in the forest. The larger +the better, but also the longer it will take to compute. In addition, note that +results will stop getting significantly better beyond a critical number of +trees. The latter is the size of the random subsets of features to consider +when splitting a node. The lower the greater the reduction of variance, but +also the greater the increase in bias. Empirical good default values are +``max_features=1.0`` or equivalently ``max_features=None`` (always considering +all features instead of a random subset) for regression problems, and +``max_features="sqrt"`` (using a random subset of size ``sqrt(n_features)``) +for classification tasks (where ``n_features`` is the number of features in +the data). The default value of ``max_features=1.0`` is equivalent to bagged +trees and more randomness can be achieved by setting smaller values (e.g. 0.3 +is a typical default in the literature). Good results are often achieved when +setting ``max_depth=None`` in combination with ``min_samples_split=2`` (i.e., +when fully developing the trees). Bear in mind though that these values are +usually not optimal, and might result in models that consume a lot of RAM. +The best parameter values should always be cross-validated. In addition, note +that in random forests, bootstrap samples are used by default +(``bootstrap=True``) while the default strategy for extra-trees is to use the +whole dataset (``bootstrap=False``). When using bootstrap sampling the +generalization error can be estimated on the left out or out-of-bag samples. +This can be enabled by setting ``oob_score=True``. -If no missing values were encountered for a given feature during training, -then samples with missing values are mapped to whichever child has the most -samples. +.. note:: -.. _sw_hgbdt: + The size of the model with the default parameters is :math:`O( M * N * log (N) )`, + where :math:`M` is the number of trees and :math:`N` is the number of samples. + In order to reduce the size of the model, you can change these parameters: + ``min_samples_split``, ``max_leaf_nodes``, ``max_depth`` and ``min_samples_leaf``. -Sample weight support ---------------------- +Parallelization +--------------- -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` sample support weights during -:term:`fit`. +Finally, this module also features the parallel construction of the trees +and the parallel computation of the predictions through the ``n_jobs`` +parameter. If ``n_jobs=k`` then computations are partitioned into +``k`` jobs, and run on ``k`` cores of the machine. If ``n_jobs=-1`` +then all cores available on the machine are used. Note that because of +inter-process communication overhead, the speedup might not be linear +(i.e., using ``k`` jobs will unfortunately not be ``k`` times as +fast). Significant speedup can still be achieved though when building +a large number of trees, or when building a single tree requires a fair +amount of time (e.g., on large datasets). -The following toy example demonstrates how the model ignores the samples with -zero sample weights: +.. topic:: Examples: - >>> X = [[1, 0], - ... [1, 0], - ... [1, 0], - ... [0, 1]] - >>> y = [0, 0, 1, 0] - >>> # ignore the first 2 training samples by setting their weight to 0 - >>> sample_weight = [0, 0, 1, 1] - >>> gb = HistGradientBoostingClassifier(min_samples_leaf=1) - >>> gb.fit(X, y, sample_weight=sample_weight) - HistGradientBoostingClassifier(...) - >>> gb.predict([[1, 0]]) - array([1]) - >>> gb.predict_proba([[1, 0]])[0, 1] - 0.99... + * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py` + * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` + * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` -As you can see, the `[1, 0]` is comfortably classified as `1` since the first -two samples are ignored due to their sample weights. +.. topic:: References -Implementation detail: taking sample weights into account amounts to -multiplying the gradients (and the hessians) by the sample weights. Note that -the binning stage (specifically the quantiles computation) does not take the -weights into account. + .. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001. -.. _categorical_support_gbdt: + .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998. -Categorical Features Support ----------------------------- + * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized + trees", Machine Learning, 63(1), 3-42, 2006. -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` have native support for categorical -features: they can consider splits on non-ordered, categorical data. +.. _random_forest_feature_importance: -For datasets with categorical features, using the native categorical support -is often better than relying on one-hot encoding -(:class:`~sklearn.preprocessing.OneHotEncoder`), because one-hot encoding -requires more tree depth to achieve equivalent splits. It is also usually -better to rely on the native categorical support rather than to treat -categorical features as continuous (ordinal), which happens for ordinal-encoded -categorical data, since categories are nominal quantities where order does not -matter. +Feature importance evaluation +----------------------------- -To enable categorical support, a boolean mask can be passed to the -`categorical_features` parameter, indicating which feature is categorical. In -the following, the first feature will be treated as categorical and the -second feature as numerical:: +The relative rank (i.e. depth) of a feature used as a decision node in a +tree can be used to assess the relative importance of that feature with +respect to the predictability of the target variable. Features used at +the top of the tree contribute to the final prediction decision of a +larger fraction of the input samples. The **expected fraction of the +samples** they contribute to can thus be used as an estimate of the +**relative importance of the features**. In scikit-learn, the fraction of +samples a feature contributes to is combined with the decrease in impurity +from splitting them to create a normalized estimate of the predictive power +of that feature. - >>> gbdt = HistGradientBoostingClassifier(categorical_features=[True, False]) +By **averaging** the estimates of predictive ability over several randomized +trees one can **reduce the variance** of such an estimate and use it +for feature selection. This is known as the mean decrease in impurity, or MDI. +Refer to [L2014]_ for more information on MDI and feature importance +evaluation with Random Forests. -Equivalently, one can pass a list of integers indicating the indices of the -categorical features:: +.. warning:: - >>> gbdt = HistGradientBoostingClassifier(categorical_features=[0]) + The impurity-based feature importances computed on tree-based models suffer + from two flaws that can lead to misleading conclusions. First they are + computed on statistics derived from the training dataset and therefore **do + not necessarily inform us on which features are most important to make good + predictions on held-out dataset**. Secondly, **they favor high cardinality + features**, that is features with many unique values. + :ref:`permutation_importance` is an alternative to impurity-based feature + importance that does not suffer from these flaws. These two methods of + obtaining feature importance are explored in: + :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`. -The cardinality of each categorical feature should be less than the `max_bins` -parameter, and each categorical feature is expected to be encoded in -`[0, max_bins - 1]`. To that end, it might be useful to pre-process the data -with an :class:`~sklearn.preprocessing.OrdinalEncoder` as done in -:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`. +The following example shows a color-coded representation of the relative +importances of each individual pixel for a face recognition task using +a :class:`ExtraTreesClassifier` model. -If there are missing values during training, the missing values will be -treated as a proper category. If there are no missing values during training, -then at prediction time, missing values are mapped to the child node that has -the most samples (just like for continuous features). When predicting, -categories that were not seen during fit time will be treated as missing -values. +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_importances_faces_001.png + :target: ../auto_examples/ensemble/plot_forest_importances_faces.html + :align: center + :scale: 75 -**Split finding with categorical features**: The canonical way of considering -categorical splits in a tree is to consider -all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of -categories. This can quickly become prohibitive when :math:`K` is large. -Fortunately, since gradient boosting trees are always regression trees (even -for classification problems), there exist a faster strategy that can yield -equivalent splits. First, the categories of a feature are sorted according to -the variance of the target, for each category `k`. Once the categories are -sorted, one can consider *continuous partitions*, i.e. treat the categories -as if they were ordered continuous values (see Fisher [Fisher1958]_ for a -formal proof). As a result, only :math:`K - 1` splits need to be considered -instead of :math:`2^{K - 1} - 1`. The initial sorting is a -:math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of -:math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`. +In practice those estimates are stored as an attribute named +``feature_importances_`` on the fitted model. This is an array with shape +``(n_features,)`` whose values are positive and sum to 1.0. The higher +the value, the more important is the contribution of the matching feature +to the prediction function. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py` - -.. _monotonic_cst_gbdt: - -Monotonic Constraints ---------------------- + * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py` + * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py` -Depending on the problem at hand, you may have prior knowledge indicating -that a given feature should in general have a positive (or negative) effect -on the target value. For example, all else being equal, a higher credit -score should increase the probability of getting approved for a loan. -Monotonic constraints allow you to incorporate such prior knowledge into the -model. +.. topic:: References -For a predictor :math:`F` with two features: + .. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to + Practice" <1407.7502>`, + PhD Thesis, U. of Liege, 2014. - - a **monotonic increase constraint** is a constraint of the form: - .. math:: - x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2) +.. _random_trees_embedding: - - a **monotonic decrease constraint** is a constraint of the form: - .. math:: - x_1 \leq x_1' \implies F(x_1, x_2) \geq F(x_1', x_2) +Totally Random Trees Embedding +------------------------------ -You can specify a monotonic constraint on each feature using the -`monotonic_cst` parameter. For each feature, a value of 0 indicates no -constraint, while 1 and -1 indicate a monotonic increase and -monotonic decrease constraint, respectively:: +:class:`RandomTreesEmbedding` implements an unsupervised transformation of the +data. Using a forest of completely random trees, :class:`RandomTreesEmbedding` +encodes the data by the indices of the leaves a data point ends up in. This +index is then encoded in a one-of-K manner, leading to a high dimensional, +sparse binary coding. +This coding can be computed very efficiently and can then be used as a basis +for other learning tasks. +The size and sparsity of the code can be influenced by choosing the number of +trees and the maximum depth per tree. For each tree in the ensemble, the coding +contains one entry of one. The size of the coding is at most ``n_estimators * 2 +** max_depth``, the maximum number of leaves in the forest. - >>> from sklearn.ensemble import HistGradientBoostingRegressor +As neighboring data points are more likely to lie within the same leaf of a +tree, the transformation performs an implicit, non-parametric density +estimation. - ... # monotonic increase, monotonic decrease, and no constraint on the 3 features - >>> gbdt = HistGradientBoostingRegressor(monotonic_cst=[1, -1, 0]) +.. topic:: Examples: -In a binary classification context, imposing a monotonic increase (decrease) constraint means that higher values of the feature are supposed -to have a positive (negative) effect on the probability of samples -to belong to the positive class. + * :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py` -Nevertheless, monotonic constraints only marginally constrain feature effects on the output. -For instance, monotonic increase and decrease constraints cannot be used to enforce the -following modelling constraint: + * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear + dimensionality reduction techniques on handwritten digits. - .. math:: - x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2') + * :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares + supervised and unsupervised tree based feature transformations. -Also, monotonic constraints are not supported for multiclass classification. +.. seealso:: -.. note:: - Since categories are unordered quantities, it is not possible to enforce - monotonic constraints on categorical features. + :ref:`manifold` techniques can also be useful to derive non-linear + representations of feature space, also these approaches focus also on + dimensionality reduction. -.. topic:: Examples: +.. _tree_ensemble_warm_start: - * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py` +Fitting additional trees +------------------------ -.. _interaction_cst_hgbt: +RandomForest, Extra-Trees and :class:`RandomTreesEmbedding` estimators all support +``warm_start=True`` which allows you to add more trees to an already fitted model. -Interaction constraints ------------------------ +:: -A priori, the histogram gradient boosting trees are allowed to use any feature -to split a node into child nodes. This creates so called interactions between -features, i.e. usage of different features as split along a branch. Sometimes, -one wants to restrict the possible interactions, see [Mayer2022]_. This can be -done by the parameter ``interaction_cst``, where one can specify the indices -of features that are allowed to interact. -For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]`` -forbids all interactions. -The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly -interacting features. Features 0 and 1 may interact with each other, as well -as features 1 and 2. But note that features 0 and 2 are forbidden to interact. -The following depicts a tree and the possible splits of the tree: + >>> from sklearn.datasets import make_classification + >>> from sklearn.ensemble import RandomForestClassifier + + >>> X, y = make_classification(n_samples=100, random_state=1) + >>> clf = RandomForestClassifier(n_estimators=10) + >>> clf = clf.fit(X, y) # fit with 10 trees + >>> len(clf.estimators_) + 10 + >>> # set warm_start and increase num of estimators + >>> _ = clf.set_params(n_estimators=20, warm_start=True) + >>> _ = clf.fit(X, y) # fit additional 10 trees + >>> len(clf.estimators_) + 20 + +When ``random_state`` is also set, the internal random state is also preserved +between ``fit`` calls. This means that training a model once with ``n`` estimators is +the same as building the model iteratively via multiple ``fit`` calls, where the +final number of estimators is equal to ``n``. -.. code-block:: none +:: - 1 <- Both constraint groups could be applied from now on - / \ - 1 2 <- Left split still fulfills both constraint groups. - / \ / \ Right split at feature 2 has only group {1, 2} from now on. + >>> clf = RandomForestClassifier(n_estimators=20) # set `n_estimators` to 10 + 10 + >>> _ = clf.fit(X, y) # fit `estimators_` will be the same as `clf` above -LightGBM uses the same logic for overlapping groups. +Note that this differs from the usual behavior of :term:`random_state` in that it does +*not* result in the same result across different calls. -Note that features not listed in ``interaction_cst`` are automatically -assigned an interaction group for themselves. With again 3 features, this -means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``. +.. _bagging: -.. topic:: References +Bagging meta-estimator +====================== - .. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio. - 2022. :doi:`Machine Learning Applications to Land and Structure Valuation - <10.3390/jrfm15050193>`. - Journal of Risk and Financial Management 15, no. 5: 193 +In ensemble algorithms, bagging methods form a class of algorithms which build +several instances of a black-box estimator on random subsets of the original +training set and then aggregate their individual predictions to form a final +prediction. These methods are used as a way to reduce the variance of a base +estimator (e.g., a decision tree), by introducing randomization into its +construction procedure and then making an ensemble out of it. In many cases, +bagging methods constitute a very simple way to improve with respect to a +single model, without making it necessary to adapt the underlying base +algorithm. As they provide a way to reduce overfitting, bagging methods work +best with strong and complex models (e.g., fully developed decision trees), in +contrast with boosting methods which usually work best with weak models (e.g., +shallow decision trees). -Low-level parallelism ---------------------- +Bagging methods come in many flavours but mostly differ from each other by the +way they draw random subsets of the training set: -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` have implementations that use OpenMP -for parallelization through Cython. For more details on how to control the -number of threads, please refer to our :ref:`parallelism` notes. +* When random subsets of the dataset are drawn as random subsets of the + samples, then this algorithm is known as Pasting [B1999]_. -The following parts are parallelized: +* When samples are drawn with replacement, then the method is known as + Bagging [B1996]_. -- mapping samples from real values to integer-valued bins (finding the bin - thresholds is however sequential) -- building histograms is parallelized over features -- finding the best split point at a node is parallelized over features -- during fit, mapping samples into the left and right children is - parallelized over samples -- gradient and hessians computations are parallelized over samples -- predicting is parallelized over samples +* When random subsets of the dataset are drawn as random subsets of + the features, then the method is known as Random Subspaces [H1998]_. -Why it's faster ---------------- +* Finally, when base estimators are built on subsets of both samples and + features, then the method is known as Random Patches [LG2012]_. -The bottleneck of a gradient boosting procedure is building the decision -trees. Building a traditional decision tree (as in the other GBDTs -:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`) -requires sorting the samples at each node (for -each feature). Sorting is needed so that the potential gain of a split point -can be computed efficiently. Splitting a single node has thus a complexity -of :math:`\mathcal{O}(n_\text{features} \times n \log(n))` where :math:`n` -is the number of samples at the node. +In scikit-learn, bagging methods are offered as a unified +:class:`BaggingClassifier` meta-estimator (resp. :class:`BaggingRegressor`), +taking as input a user-specified estimator along with parameters +specifying the strategy to draw random subsets. In particular, ``max_samples`` +and ``max_features`` control the size of the subsets (in terms of samples and +features), while ``bootstrap`` and ``bootstrap_features`` control whether +samples and features are drawn with or without replacement. When using a subset +of the available samples the generalization accuracy can be estimated with the +out-of-bag samples by setting ``oob_score=True``. As an example, the +snippet below illustrates how to instantiate a bagging ensemble of +:class:`~sklearn.neighbors.KNeighborsClassifier` estimators, each built on random +subsets of 50% of the samples and 50% of the features. -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor`, in contrast, do not require sorting the -feature values and instead use a data-structure called a histogram, where the -samples are implicitly ordered. Building a histogram has a -:math:`\mathcal{O}(n)` complexity, so the node splitting procedure has a -:math:`\mathcal{O}(n_\text{features} \times n)` complexity, much smaller -than the previous one. In addition, instead of considering :math:`n` split -points, we here consider only ``max_bins`` split points, which is much -smaller. + >>> from sklearn.ensemble import BaggingClassifier + >>> from sklearn.neighbors import KNeighborsClassifier + >>> bagging = BaggingClassifier(KNeighborsClassifier(), + ... max_samples=0.5, max_features=0.5) -In order to build histograms, the input data `X` needs to be binned into -integer-valued bins. This binning procedure does require sorting the feature -values, but it only happens once at the very beginning of the boosting process -(not at each node, like in :class:`GradientBoostingClassifier` and -:class:`GradientBoostingRegressor`). +.. topic:: Examples: -Finally, many parts of the implementation of -:class:`HistGradientBoostingClassifier` and -:class:`HistGradientBoostingRegressor` are parallelized. + * :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py` .. topic:: References - .. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree - Boosting System" <1603.02754>` + .. [B1999] L. Breiman, "Pasting small votes for classification in large + databases and on-line", Machine Learning, 36(1), 85-103, 1999. + + .. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2), + 123-140, 1996. + + .. [H1998] T. Ho, "The random subspace method for constructing decision + forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, + 1998. + + .. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches", + Machine Learning and Knowledge Discovery in Databases, 346-361, 2012. - .. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient - BoostingDecision Tree" `_ - .. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity" - `_ - Journal of the American Statistical Association, 53, 789-798. .. _voting_classifier: @@ -1408,8 +1488,28 @@ Vector Machine, a Decision Tree, and a K-nearest neighbor classifier:: :align: center :scale: 75% -Using the `VotingClassifier` with `GridSearchCV` ------------------------------------------------- +Usage +----- + +In order to predict the class labels based on the predicted +class-probabilities (scikit-learn estimators in the VotingClassifier +must support ``predict_proba`` method):: + + >>> eclf = VotingClassifier( + ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], + ... voting='soft' + ... ) + +Optionally, weights can be provided for the individual classifiers:: + + >>> eclf = VotingClassifier( + ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], + ... voting='soft', weights=[2,5,1] + ... ) + +|details-start| +**Using the `VotingClassifier` with `GridSearchCV`** +|details-split| The :class:`VotingClassifier` can also be used together with :class:`~sklearn.model_selection.GridSearchCV` in order to tune the @@ -1429,24 +1529,7 @@ hyperparameters of the individual estimators:: >>> grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5) >>> grid = grid.fit(iris.data, iris.target) -Usage ------ - -In order to predict the class labels based on the predicted -class-probabilities (scikit-learn estimators in the VotingClassifier -must support ``predict_proba`` method):: - - >>> eclf = VotingClassifier( - ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], - ... voting='soft' - ... ) - -Optionally, weights can be provided for the individual classifiers:: - - >>> eclf = VotingClassifier( - ... estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], - ... voting='soft', weights=[2,5,1] - ... ) +|details-end| .. _voting_regressor: @@ -1609,3 +1692,92 @@ computationally expensive. .. [W1992] Wolpert, David H. "Stacked generalization." Neural networks 5.2 (1992): 241-259. + + + +.. _adaboost: + +AdaBoost +======== + +The module :mod:`sklearn.ensemble` includes the popular boosting algorithm +AdaBoost, introduced in 1995 by Freund and Schapire [FS1995]_. + +The core principle of AdaBoost is to fit a sequence of weak learners (i.e., +models that are only slightly better than random guessing, such as small +decision trees) on repeatedly modified versions of the data. The predictions +from all of them are then combined through a weighted majority vote (or sum) to +produce the final prediction. The data modifications at each so-called boosting +iteration consists of applying weights :math:`w_1`, :math:`w_2`, ..., :math:`w_N` +to each of the training samples. Initially, those weights are all set to +:math:`w_i = 1/N`, so that the first step simply trains a weak learner on the +original data. For each successive iteration, the sample weights are +individually modified and the learning algorithm is reapplied to the reweighted +data. At a given step, those training examples that were incorrectly predicted +by the boosted model induced at the previous step have their weights increased, +whereas the weights are decreased for those that were predicted correctly. As +iterations proceed, examples that are difficult to predict receive +ever-increasing influence. Each subsequent weak learner is thereby forced to +concentrate on the examples that are missed by the previous ones in the sequence +[HTF]_. + +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_adaboost_multiclass_001.png + :target: ../auto_examples/ensemble/plot_adaboost_multiclass.html + :align: center + :scale: 75 + +AdaBoost can be used both for classification and regression problems: + +- For multi-class classification, :class:`AdaBoostClassifier` implements + AdaBoost.SAMME [ZZRH2009]_. + +- For regression, :class:`AdaBoostRegressor` implements AdaBoost.R2 [D1997]_. + +Usage +----- + +The following example shows how to fit an AdaBoost classifier with 100 weak +learners:: + + >>> from sklearn.model_selection import cross_val_score + >>> from sklearn.datasets import load_iris + >>> from sklearn.ensemble import AdaBoostClassifier + + >>> X, y = load_iris(return_X_y=True) + >>> clf = AdaBoostClassifier(n_estimators=100, algorithm="SAMME",) + >>> scores = cross_val_score(clf, X, y, cv=5) + >>> scores.mean() + 0.9... + +The number of weak learners is controlled by the parameter ``n_estimators``. The +``learning_rate`` parameter controls the contribution of the weak learners in +the final combination. By default, weak learners are decision stumps. Different +weak learners can be specified through the ``estimator`` parameter. +The main parameters to tune to obtain good results are ``n_estimators`` and +the complexity of the base estimators (e.g., its depth ``max_depth`` or +minimum required number of samples to consider a split ``min_samples_split``). + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance + of AdaBoost on a multi-class problem. + + * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary + and decision function values for a non-linearly separable two-class problem + using AdaBoost-SAMME. + + * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression + with the AdaBoost.R2 algorithm. + +.. topic:: References + + .. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of + On-Line Learning and an Application to Boosting", 1997. + + .. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost", + 2009. + + .. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997. + + .. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of + Statistical Learning Ed. 2", Springer, 2009. diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst index 5876000f9a1c1..7ac538a89849b 100644 --- a/doc/modules/feature_extraction.rst +++ b/doc/modules/feature_extraction.rst @@ -206,8 +206,9 @@ Note the use of a generator comprehension, which introduces laziness into the feature extraction: tokens are only processed on demand from the hasher. -Implementation details ----------------------- +|details-start| +**Implementation details** +|details-split| :class:`FeatureHasher` uses the signed 32-bit variant of MurmurHash3. As a result (and because of limitations in ``scipy.sparse``), @@ -223,6 +224,11 @@ Since a simple modulo is used to transform the hash function to a column index, it is advisable to use a power of two as the ``n_features`` parameter; otherwise the features will not be mapped evenly to the columns. +.. topic:: References: + + * `MurmurHash3 `_. + +|details-end| .. topic:: References: @@ -230,9 +236,6 @@ otherwise the features will not be mapped evenly to the columns. Josh Attenberg (2009). `Feature hashing for large scale multitask learning `_. Proc. ICML. - * `MurmurHash3 `_. - - .. _text_feature_extraction: Text feature extraction @@ -396,7 +399,7 @@ last document:: .. _stop_words: Using stop words -................ +---------------- Stop words are words like "and", "the", "him", which are presumed to be uninformative in representing the content of a text, and which may be @@ -426,6 +429,7 @@ identify and warn about some kinds of inconsistencies. `__. In *Proc. Workshop for NLP Open Source Software*. + .. _tfidf: Tf–idf term weighting @@ -490,6 +494,10 @@ class:: Again please see the :ref:`reference documentation ` for the details on all the parameters. +|details-start| +**Numeric example of a tf-idf matrix** +|details-split| + Let's take an example with the following counts. The first term is present 100% of the time hence not very interesting. The two other features only in less than 50% of the time hence probably more representative of the @@ -607,8 +615,9 @@ As usual the best way to adjust the feature extraction parameters is to use a cross-validated grid search, for instance by pipelining the feature extractor with a classifier: - * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` +* :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py` +|details-end| Decoding text files ------------------- @@ -637,6 +646,10 @@ or ``"replace"``. See the documentation for the Python function ``bytes.decode`` for more details (type ``help(bytes.decode)`` at the Python prompt). +|details-start| +**Troubleshooting decoding text** +|details-split| + If you are having trouble decoding text, here are some things to try: - Find out what the actual encoding of the text is. The file might come @@ -690,6 +703,7 @@ About Unicode `_. .. _`ftfy`: https://github.com/LuminosoInsight/python-ftfy +|details-end| Applications and examples ------------------------- @@ -701,18 +715,18 @@ In particular in a **supervised setting** it can be successfully combined with fast and scalable linear models to train **document classifiers**, for instance: - * :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` +* :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` In an **unsupervised setting** it can be used to group similar documents together by applying clustering algorithms such as :ref:`k_means`: - * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` +* :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py` Finally it is possible to discover the main topics of a corpus by relaxing the hard assignment constraint of clustering, for instance by using :ref:`NMF`: - * :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` +* :ref:`sphx_glr_auto_examples_applications_plot_topics_extraction_with_nmf_lda.py` Limitations of the Bag of Words representation @@ -846,7 +860,7 @@ Note that the dimensionality does not affect the CPU training time of algorithms which operate on CSR matrices (``LinearSVC(dual=True)``, ``Perceptron``, ``SGDClassifier``, ``PassiveAggressive``) but it does for algorithms that work with CSC matrices (``LinearSVC(dual=False)``, ``Lasso()``, -etc). +etc.). Let's try again with the default setting:: @@ -870,8 +884,9 @@ The :class:`HashingVectorizer` also comes with the following limitations: model. A :class:`TfidfTransformer` can be appended to it in a pipeline if required. -Performing out-of-core scaling with HashingVectorizer ------------------------------------------------------- +|details-start| +**Performing out-of-core scaling with HashingVectorizer** +|details-split| An interesting development of using a :class:`HashingVectorizer` is the ability to perform `out-of-core`_ scaling. This means that we can learn from data that @@ -890,6 +905,8 @@ time is often limited by the CPU time one wants to spend on the task. For a full-fledged example of out-of-core scaling in a text classification task see :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. +|details-end| + Customizing the vectorizer classes ---------------------------------- @@ -906,19 +923,19 @@ to the vectorizer constructor:: In particular we name: - * ``preprocessor``: a callable that takes an entire document as input (as a - single string), and returns a possibly transformed version of the document, - still as an entire string. This can be used to remove HTML tags, lowercase - the entire document, etc. +* ``preprocessor``: a callable that takes an entire document as input (as a + single string), and returns a possibly transformed version of the document, + still as an entire string. This can be used to remove HTML tags, lowercase + the entire document, etc. - * ``tokenizer``: a callable that takes the output from the preprocessor - and splits it into tokens, then returns a list of these. +* ``tokenizer``: a callable that takes the output from the preprocessor + and splits it into tokens, then returns a list of these. - * ``analyzer``: a callable that replaces the preprocessor and tokenizer. - The default analyzers all call the preprocessor and tokenizer, but custom - analyzers will skip this. N-gram extraction and stop word filtering take - place at the analyzer level, so a custom analyzer may have to reproduce - these steps. +* ``analyzer``: a callable that replaces the preprocessor and tokenizer. + The default analyzers all call the preprocessor and tokenizer, but custom + analyzers will skip this. N-gram extraction and stop word filtering take + place at the analyzer level, so a custom analyzer may have to reproduce + these steps. (Lucene users might recognize these names, but be aware that scikit-learn concepts may not map one-to-one onto Lucene concepts.) @@ -928,60 +945,66 @@ parameters it is possible to derive from the class and override the ``build_preprocessor``, ``build_tokenizer`` and ``build_analyzer`` factory methods instead of passing custom functions. +|details-start| +**Tips and tricks** +|details-split| + Some tips and tricks: - * If documents are pre-tokenized by an external package, then store them in - files (or strings) with the tokens separated by whitespace and pass - ``analyzer=str.split`` - * Fancy token-level analysis such as stemming, lemmatizing, compound - splitting, filtering based on part-of-speech, etc. are not included in the - scikit-learn codebase, but can be added by customizing either the - tokenizer or the analyzer. - Here's a ``CountVectorizer`` with a tokenizer and lemmatizer using - `NLTK `_:: - - >>> from nltk import word_tokenize # doctest: +SKIP - >>> from nltk.stem import WordNetLemmatizer # doctest: +SKIP - >>> class LemmaTokenizer: - ... def __init__(self): - ... self.wnl = WordNetLemmatizer() - ... def __call__(self, doc): - ... return [self.wnl.lemmatize(t) for t in word_tokenize(doc)] - ... - >>> vect = CountVectorizer(tokenizer=LemmaTokenizer()) # doctest: +SKIP - - (Note that this will not filter out punctuation.) - - - The following example will, for instance, transform some British spelling - to American spelling:: - - >>> import re - >>> def to_british(tokens): - ... for t in tokens: - ... t = re.sub(r"(...)our$", r"\1or", t) - ... t = re.sub(r"([bt])re$", r"\1er", t) - ... t = re.sub(r"([iy])s(e$|ing|ation)", r"\1z\2", t) - ... t = re.sub(r"ogue$", "og", t) - ... yield t - ... - >>> class CustomVectorizer(CountVectorizer): - ... def build_tokenizer(self): - ... tokenize = super().build_tokenizer() - ... return lambda doc: list(to_british(tokenize(doc))) - ... - >>> print(CustomVectorizer().build_analyzer()(u"color colour")) - [...'color', ...'color'] - - for other styles of preprocessing; examples include stemming, lemmatization, - or normalizing numerical tokens, with the latter illustrated in: - - * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py` +* If documents are pre-tokenized by an external package, then store them in + files (or strings) with the tokens separated by whitespace and pass + ``analyzer=str.split`` +* Fancy token-level analysis such as stemming, lemmatizing, compound + splitting, filtering based on part-of-speech, etc. are not included in the + scikit-learn codebase, but can be added by customizing either the + tokenizer or the analyzer. + Here's a ``CountVectorizer`` with a tokenizer and lemmatizer using + `NLTK `_:: + + >>> from nltk import word_tokenize # doctest: +SKIP + >>> from nltk.stem import WordNetLemmatizer # doctest: +SKIP + >>> class LemmaTokenizer: + ... def __init__(self): + ... self.wnl = WordNetLemmatizer() + ... def __call__(self, doc): + ... return [self.wnl.lemmatize(t) for t in word_tokenize(doc)] + ... + >>> vect = CountVectorizer(tokenizer=LemmaTokenizer()) # doctest: +SKIP + + (Note that this will not filter out punctuation.) + + + The following example will, for instance, transform some British spelling + to American spelling:: + + >>> import re + >>> def to_british(tokens): + ... for t in tokens: + ... t = re.sub(r"(...)our$", r"\1or", t) + ... t = re.sub(r"([bt])re$", r"\1er", t) + ... t = re.sub(r"([iy])s(e$|ing|ation)", r"\1z\2", t) + ... t = re.sub(r"ogue$", "og", t) + ... yield t + ... + >>> class CustomVectorizer(CountVectorizer): + ... def build_tokenizer(self): + ... tokenize = super().build_tokenizer() + ... return lambda doc: list(to_british(tokenize(doc))) + ... + >>> print(CustomVectorizer().build_analyzer()(u"color colour")) + [...'color', ...'color'] + + for other styles of preprocessing; examples include stemming, lemmatization, + or normalizing numerical tokens, with the latter illustrated in: + + * :ref:`sphx_glr_auto_examples_bicluster_plot_bicluster_newsgroups.py` Customizing the vectorizer can also be useful when handling Asian languages that do not use an explicit word separator such as whitespace. +|details-end| + .. _image_feature_extraction: Image feature extraction @@ -1033,7 +1056,7 @@ on overlapping areas:: The :class:`PatchExtractor` class works in the same way as :func:`extract_patches_2d`, only it supports multiple images as input. It is -implemented as an estimator, so it can be used in pipelines. See:: +implemented as a scikit-learn transformer, so it can be used in pipelines. See:: >>> five_images = np.arange(5 * 4 * 4 * 3).reshape(5, 4, 4, 3) >>> patches = image.PatchExtractor(patch_size=(2, 2)).transform(five_images) diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst index f8a0562aa5498..1b5ce57b0074f 100644 --- a/doc/modules/feature_selection.rst +++ b/doc/modules/feature_selection.rst @@ -57,29 +57,29 @@ univariate statistical tests. It can be seen as a preprocessing step to an estimator. Scikit-learn exposes feature selection routines as objects that implement the ``transform`` method: - * :class:`SelectKBest` removes all but the :math:`k` highest scoring features +* :class:`SelectKBest` removes all but the :math:`k` highest scoring features - * :class:`SelectPercentile` removes all but a user-specified highest scoring - percentage of features +* :class:`SelectPercentile` removes all but a user-specified highest scoring + percentage of features - * using common univariate statistical tests for each feature: - false positive rate :class:`SelectFpr`, false discovery rate - :class:`SelectFdr`, or family wise error :class:`SelectFwe`. +* using common univariate statistical tests for each feature: + false positive rate :class:`SelectFpr`, false discovery rate + :class:`SelectFdr`, or family wise error :class:`SelectFwe`. - * :class:`GenericUnivariateSelect` allows to perform univariate feature - selection with a configurable strategy. This allows to select the best - univariate selection strategy with hyper-parameter search estimator. +* :class:`GenericUnivariateSelect` allows to perform univariate feature + selection with a configurable strategy. This allows to select the best + univariate selection strategy with hyper-parameter search estimator. -For instance, we can perform a :math:`\chi^2` test to the samples -to retrieve only the two best features as follows: +For instance, we can use a F-test to retrieve the two +best features for a dataset as follows: >>> from sklearn.datasets import load_iris >>> from sklearn.feature_selection import SelectKBest - >>> from sklearn.feature_selection import chi2 + >>> from sklearn.feature_selection import f_classif >>> X, y = load_iris(return_X_y=True) >>> X.shape (150, 4) - >>> X_new = SelectKBest(chi2, k=2).fit_transform(X, y) + >>> X_new = SelectKBest(f_classif, k=2).fit_transform(X, y) >>> X_new.shape (150, 2) @@ -87,14 +87,15 @@ These objects take as input a scoring function that returns univariate scores and p-values (or only scores for :class:`SelectKBest` and :class:`SelectPercentile`): - * For regression: :func:`r_regression`, :func:`f_regression`, :func:`mutual_info_regression` +* For regression: :func:`r_regression`, :func:`f_regression`, :func:`mutual_info_regression` - * For classification: :func:`chi2`, :func:`f_classif`, :func:`mutual_info_classif` +* For classification: :func:`chi2`, :func:`f_classif`, :func:`mutual_info_classif` The methods based on F-test estimate the degree of linear dependency between two random variables. On the other hand, mutual information methods can capture any kind of statistical dependency, but being nonparametric, they require more -samples for accurate estimation. +samples for accurate estimation. Note that the :math:`\chi^2`-test should only be +applied to non-negative features, such as frequencies. .. topic:: Feature selection with sparse data @@ -107,6 +108,12 @@ samples for accurate estimation. Beware not to use a regression scoring function with a classification problem, you will get useless results. +.. note:: + + The :class:`SelectPercentile` and :class:`SelectKBest` support unsupervised + feature selection as well. One needs to provide a `score_func` where `y=None`. + The `score_func` should use internally `X` to compute the scores. + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_feature_selection_plot_feature_selection.py` @@ -129,7 +136,13 @@ repeated on the pruned set until the desired number of features to select is eventually reached. :class:`RFECV` performs RFE in a cross-validation loop to find the optimal -number of features. +number of features. In more details, the number of features selected is tuned +automatically by fitting an :class:`RFE` selector on the different +cross-validation splits (provided by the `cv` parameter). The performance +of the :class:`RFE` selector are evaluated using `scorer` for different number +of selected features and aggregated together. Finally, the scores are averaged +across folds and the number of features selected is set to the number of +features that maximize the cross-validation score. .. topic:: Examples: @@ -200,30 +213,36 @@ alpha parameter, the fewer features selected. .. _compressive_sensing: -.. topic:: **L1-recovery and compressive sensing** - - For a good choice of alpha, the :ref:`lasso` can fully recover the - exact set of non-zero variables using only few observations, provided - certain specific conditions are met. In particular, the number of - samples should be "sufficiently large", or L1 models will perform at - random, where "sufficiently large" depends on the number of non-zero - coefficients, the logarithm of the number of features, the amount of - noise, the smallest absolute value of non-zero coefficients, and the - structure of the design matrix X. In addition, the design matrix must - display certain specific properties, such as not being too correlated. - - There is no general rule to select an alpha parameter for recovery of - non-zero coefficients. It can by set by cross-validation - (:class:`LassoCV` or :class:`LassoLarsCV`), though this may lead to - under-penalized models: including a small number of non-relevant - variables is not detrimental to prediction score. BIC - (:class:`LassoLarsIC`) tends, on the opposite, to set high values of - alpha. - - **Reference** Richard G. Baraniuk "Compressive Sensing", IEEE Signal +|details-start| +**L1-recovery and compressive sensing** +|details-split| + +For a good choice of alpha, the :ref:`lasso` can fully recover the +exact set of non-zero variables using only few observations, provided +certain specific conditions are met. In particular, the number of +samples should be "sufficiently large", or L1 models will perform at +random, where "sufficiently large" depends on the number of non-zero +coefficients, the logarithm of the number of features, the amount of +noise, the smallest absolute value of non-zero coefficients, and the +structure of the design matrix X. In addition, the design matrix must +display certain specific properties, such as not being too correlated. + +There is no general rule to select an alpha parameter for recovery of +non-zero coefficients. It can by set by cross-validation +(:class:`~sklearn.linear_model.LassoCV` or +:class:`~sklearn.linear_model.LassoLarsCV`), though this may lead to +under-penalized models: including a small number of non-relevant variables +is not detrimental to prediction score. BIC +(:class:`~sklearn.linear_model.LassoLarsIC`) tends, on the opposite, to set +high values of alpha. + +.. topic:: Reference + + Richard G. Baraniuk "Compressive Sensing", IEEE Signal Processing Magazine [120] July 2007 http://users.isr.ist.utl.pt/~aguiar/CS_notes.pdf +|details-end| Tree-based feature selection ---------------------------- @@ -280,6 +299,10 @@ instead of starting with no features and greedily adding features, we start with *all* the features and greedily *remove* features from the set. The `direction` parameter controls whether forward or backward SFS is used. +|details-start| +**Detail on Sequential Feature Selection** +|details-split| + In general, forward and backward selection do not yield equivalent results. Also, one may be much faster than the other depending on the requested number of selected features: if we have 10 features and ask for 7 selected features, @@ -297,16 +320,18 @@ cross-validation requires fitting `m * k` models, while :class:`~sklearn.feature_selection.SelectFromModel` always just does a single fit and requires no iterations. -.. topic:: Examples - - * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` - -.. topic:: References: +.. topic:: Reference .. [sfs] Ferri et al, `Comparative study of techniques for large-scale feature selection `_. +|details-end| + +.. topic:: Examples + + * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py` + Feature selection as part of a pipeline ======================================= diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst index 1f40ef26b5fd4..58e56a557ed73 100644 --- a/doc/modules/gaussian_process.rst +++ b/doc/modules/gaussian_process.rst @@ -1,5 +1,3 @@ - - .. _gaussian_process: ================== @@ -8,30 +6,30 @@ Gaussian Processes .. currentmodule:: sklearn.gaussian_process -**Gaussian Processes (GP)** are a generic supervised learning method designed +**Gaussian Processes (GP)** are a nonparametric supervised learning method used to solve *regression* and *probabilistic classification* problems. The advantages of Gaussian processes are: - - The prediction interpolates the observations (at least for regular - kernels). +- The prediction interpolates the observations (at least for regular + kernels). - - The prediction is probabilistic (Gaussian) so that one can compute - empirical confidence intervals and decide based on those if one should - refit (online fitting, adaptive fitting) the prediction in some - region of interest. +- The prediction is probabilistic (Gaussian) so that one can compute + empirical confidence intervals and decide based on those if one should + refit (online fitting, adaptive fitting) the prediction in some + region of interest. - - Versatile: different :ref:`kernels - ` can be specified. Common kernels are provided, but - it is also possible to specify custom kernels. +- Versatile: different :ref:`kernels + ` can be specified. Common kernels are provided, but + it is also possible to specify custom kernels. The disadvantages of Gaussian processes include: - - They are not sparse, i.e., they use the whole samples/features information to - perform the prediction. +- Our implementation is not sparse, i.e., they use the whole samples/features + information to perform the prediction. - - They lose efficiency in high dimensional spaces -- namely when the number - of features exceeds a few dozens. +- They lose efficiency in high dimensional spaces -- namely when the number + of features exceeds a few dozens. .. _gpr: @@ -42,31 +40,44 @@ Gaussian Process Regression (GPR) .. currentmodule:: sklearn.gaussian_process The :class:`GaussianProcessRegressor` implements Gaussian processes (GP) for -regression purposes. For this, the prior of the GP needs to be specified. The -prior mean is assumed to be constant and zero (for ``normalize_y=False``) or the -training data's mean (for ``normalize_y=True``). The prior's -covariance is specified by passing a :ref:`kernel ` object. The -hyperparameters of the kernel are optimized during fitting of -GaussianProcessRegressor by maximizing the log-marginal-likelihood (LML) based -on the passed ``optimizer``. As the LML may have multiple local optima, the -optimizer can be started repeatedly by specifying ``n_restarts_optimizer``. The -first run is always conducted starting from the initial hyperparameter values -of the kernel; subsequent runs are conducted from hyperparameter values -that have been chosen randomly from the range of allowed values. -If the initial hyperparameters should be kept fixed, `None` can be passed as -optimizer. +regression purposes. For this, the prior of the GP needs to be specified. GP +will combine this prior and the likelihood function based on training samples. +It allows to give a probabilistic approach to prediction by giving the mean and +standard deviation as output when predicting. -The noise level in the targets can be specified by passing it via the -parameter ``alpha``, either globally as a scalar or per datapoint. -Note that a moderate noise level can also be helpful for dealing with numeric -issues during fitting as it is effectively implemented as Tikhonov -regularization, i.e., by adding it to the diagonal of the kernel matrix. An -alternative to specifying the noise level explicitly is to include a -WhiteKernel component into the kernel, which can estimate the global noise -level from the data (see example below). +.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_targets_002.png + :target: ../auto_examples/gaussian_process/plot_gpr_noisy_targets.html + :align: center + +The prior mean is assumed to be constant and zero (for `normalize_y=False`) or +the training data's mean (for `normalize_y=True`). The prior's covariance is +specified by passing a :ref:`kernel ` object. The hyperparameters +of the kernel are optimized when fitting the :class:`GaussianProcessRegressor` +by maximizing the log-marginal-likelihood (LML) based on the passed +`optimizer`. As the LML may have multiple local optima, the optimizer can be +started repeatedly by specifying `n_restarts_optimizer`. The first run is +always conducted starting from the initial hyperparameter values of the kernel; +subsequent runs are conducted from hyperparameter values that have been chosen +randomly from the range of allowed values. If the initial hyperparameters +should be kept fixed, `None` can be passed as optimizer. + +The noise level in the targets can be specified by passing it via the parameter +`alpha`, either globally as a scalar or per datapoint. Note that a moderate +noise level can also be helpful for dealing with numeric instabilities during +fitting as it is effectively implemented as Tikhonov regularization, i.e., by +adding it to the diagonal of the kernel matrix. An alternative to specifying +the noise level explicitly is to include a +:class:`~sklearn.gaussian_process.kernels.WhiteKernel` component into the +kernel, which can estimate the global noise level from the data (see example +below). The figure below shows the effect of noisy target handled by setting +the parameter `alpha`. + +.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_targets_003.png + :target: ../auto_examples/gaussian_process/plot_gpr_noisy_targets.html + :align: center The implementation is based on Algorithm 2.1 of [RW2006]_. In addition to -the API of standard scikit-learn estimators, GaussianProcessRegressor: +the API of standard scikit-learn estimators, :class:`GaussianProcessRegressor`: * allows prediction without prior fitting (based on the GP prior) @@ -77,152 +88,12 @@ the API of standard scikit-learn estimators, GaussianProcessRegressor: externally for other ways of selecting hyperparameters, e.g., via Markov chain Monte Carlo. +.. topic:: Examples -GPR examples -============ - -GPR with noise-level estimation -------------------------------- -This example illustrates that GPR with a sum-kernel including a WhiteKernel can -estimate the noise level of data. An illustration of the -log-marginal-likelihood (LML) landscape shows that there exist two local -maxima of LML. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_003.png - :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html - :align: center - -The first corresponds to a model with a high noise level and a -large length scale, which explains all variations in the data by noise. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_004.png - :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html - :align: center - -The second one has a smaller noise level and shorter length scale, which explains -most of the variation by the noise-free functional relationship. The second -model has a higher likelihood; however, depending on the initial value for the -hyperparameters, the gradient-based optimization might also converge to the -high-noise solution. It is thus important to repeat the optimization several -times for different initializations. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_005.png - :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html - :align: center - - -Comparison of GPR and Kernel Ridge Regression ---------------------------------------------- - -Both kernel ridge regression (KRR) and GPR learn -a target function by employing internally the "kernel trick". KRR learns a -linear function in the space induced by the respective kernel which corresponds -to a non-linear function in the original space. The linear function in the -kernel space is chosen based on the mean-squared error loss with -ridge regularization. GPR uses the kernel to define the covariance of -a prior distribution over the target functions and uses the observed training -data to define a likelihood function. Based on Bayes theorem, a (Gaussian) -posterior distribution over target functions is defined, whose mean is used -for prediction. - -A major difference is that GPR can choose the kernel's hyperparameters based -on gradient-ascent on the marginal likelihood function while KRR needs to -perform a grid search on a cross-validated loss function (mean-squared error -loss). A further difference is that GPR learns a generative, probabilistic -model of the target function and can thus provide meaningful confidence -intervals and posterior samples along with the predictions while KRR only -provides predictions. - -The following figure illustrates both methods on an artificial dataset, which -consists of a sinusoidal target function and strong noise. The figure compares -the learned model of KRR and GPR based on a ExpSineSquared kernel, which is -suited for learning periodic functions. The kernel's hyperparameters control -the smoothness (length_scale) and periodicity of the kernel (periodicity). -Moreover, the noise level -of the data is learned explicitly by GPR by an additional WhiteKernel component -in the kernel and by the regularization parameter alpha of KRR. - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_compare_gpr_krr_005.png - :target: ../auto_examples/gaussian_process/plot_compare_gpr_krr.html - :align: center - -The figure shows that both methods learn reasonable models of the target -function. GPR correctly identifies the periodicity of the function to be -roughly :math:`2*\pi` (6.28), while KRR chooses the doubled periodicity -:math:`4*\pi` . Besides -that, GPR provides reasonable confidence bounds on the prediction which are not -available for KRR. A major difference between the two methods is the time -required for fitting and predicting: while fitting KRR is fast in principle, -the grid-search for hyperparameter optimization scales exponentially with the -number of hyperparameters ("curse of dimensionality"). The gradient-based -optimization of the parameters in GPR does not suffer from this exponential -scaling and is thus considerably faster on this example with 3-dimensional -hyperparameter space. The time for predicting is similar; however, generating -the variance of the predictive distribution of GPR takes considerably longer -than just predicting the mean. - -GPR on Mauna Loa CO2 data -------------------------- - -This example is based on Section 5.4.3 of [RW2006]_. -It illustrates an example of complex kernel engineering and -hyperparameter optimization using gradient ascent on the -log-marginal-likelihood. The data consists of the monthly average atmospheric -CO2 concentrations (in parts per million by volume (ppmv)) collected at the -Mauna Loa Observatory in Hawaii, between 1958 and 1997. The objective is to -model the CO2 concentration as a function of the time t. - -The kernel is composed of several terms that are responsible for explaining -different properties of the signal: - -- a long term, smooth rising trend is to be explained by an RBF kernel. The - RBF kernel with a large length-scale enforces this component to be smooth; - it is not enforced that the trend is rising which leaves this choice to the - GP. The specific length-scale and the amplitude are free hyperparameters. - -- a seasonal component, which is to be explained by the periodic - ExpSineSquared kernel with a fixed periodicity of 1 year. The length-scale - of this periodic component, controlling its smoothness, is a free parameter. - In order to allow decaying away from exact periodicity, the product with an - RBF kernel is taken. The length-scale of this RBF component controls the - decay time and is a further free parameter. - -- smaller, medium term irregularities are to be explained by a - RationalQuadratic kernel component, whose length-scale and alpha parameter, - which determines the diffuseness of the length-scales, are to be determined. - According to [RW2006]_, these irregularities can better be explained by - a RationalQuadratic than an RBF kernel component, probably because it can - accommodate several length-scales. - -- a "noise" term, consisting of an RBF kernel contribution, which shall - explain the correlated noise components such as local weather phenomena, - and a WhiteKernel contribution for the white noise. The relative amplitudes - and the RBF's length scale are further free parameters. - -Maximizing the log-marginal-likelihood after subtracting the target's mean -yields the following kernel with an LML of -83.214: - -:: - - 34.4**2 * RBF(length_scale=41.8) - + 3.27**2 * RBF(length_scale=180) * ExpSineSquared(length_scale=1.44, - periodicity=1) - + 0.446**2 * RationalQuadratic(alpha=17.7, length_scale=0.957) - + 0.197**2 * RBF(length_scale=0.138) + WhiteKernel(noise_level=0.0336) - -Thus, most of the target signal (34.4ppm) is explained by a long-term rising -trend (length-scale 41.8 years). The periodic component has an amplitude of -3.27ppm, a decay time of 180 years and a length-scale of 1.44. The long decay -time indicates that we have a locally very close to periodic seasonal -component. The correlated noise has an amplitude of 0.197ppm with a length -scale of 0.138 years and a white-noise contribution of 0.197ppm. Thus, the -overall noise level is very small, indicating that the data can be very well -explained by the model. The figure shows also that the model makes very -confident predictions until around 2015 - -.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_co2_003.png - :target: ../auto_examples/gaussian_process/plot_gpr_co2.html - :align: center + * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy_targets.py` + * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy.py` + * :ref:`sphx_glr_auto_examples_gaussian_process_plot_compare_gpr_krr.py` + * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_co2.py` .. _gpc: @@ -368,8 +239,10 @@ also invariant to rotations in the input space. For more details, we refer to Chapter 4 of [RW2006]_. For guidance on how to best combine different kernels, we refer to [Duv2014]_. -Gaussian Process Kernel API ---------------------------- +|details-start| +**Gaussian Process Kernel API** +|details-split| + The main usage of a :class:`Kernel` is to compute the GP's covariance between datapoints. For this, the method ``__call__`` of the kernel can be called. This method can either be used to compute the "auto-covariance" of all pairs of @@ -404,15 +277,17 @@ The specification of each hyperparameter is stored in the form of an instance of hyperparameter with name "x" must have the attributes self.x and self.x_bounds. The abstract base class for all kernels is :class:`Kernel`. Kernel implements a -similar interface as :class:`Estimator`, providing the methods ``get_params()``, -``set_params()``, and ``clone()``. This allows setting kernel values also via -meta-estimators such as :class:`Pipeline` or :class:`GridSearch`. Note that due to the nested +similar interface as :class:`~sklearn.base.BaseEstimator`, providing the +methods ``get_params()``, ``set_params()``, and ``clone()``. This allows +setting kernel values also via meta-estimators such as +:class:`~sklearn.pipeline.Pipeline` or +:class:`~sklearn.model_selection.GridSearchCV`. Note that due to the nested structure of kernels (by applying kernel operators, see below), the names of -kernel parameters might become relatively complicated. In general, for a -binary kernel operator, parameters of the left operand are prefixed with ``k1__`` -and parameters of the right operand with ``k2__``. An additional convenience -method is ``clone_with_theta(theta)``, which returns a cloned version of the -kernel but with the hyperparameters set to ``theta``. An illustrative example: +kernel parameters might become relatively complicated. In general, for a binary +kernel operator, parameters of the left operand are prefixed with ``k1__`` and +parameters of the right operand with ``k2__``. An additional convenience method +is ``clone_with_theta(theta)``, which returns a cloned version of the kernel +but with the hyperparameters set to ``theta``. An illustrative example: >>> from sklearn.gaussian_process.kernels import ConstantKernel, RBF >>> kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=0.5, length_scale_bounds=(0.0, 10.0)) + RBF(length_scale=2.0, length_scale_bounds=(0.0, 10.0)) @@ -450,6 +325,7 @@ only isotropic distances. The parameter ``gamma`` is considered to be a hyperparameter and may be optimized. The other kernel parameters are set directly at initialization and are kept fixed. +|details-end| Basic kernels ------------- @@ -510,7 +386,13 @@ Matérn kernel ------------- The :class:`Matern` kernel is a stationary kernel and a generalization of the :class:`RBF` kernel. It has an additional parameter :math:`\nu` which controls -the smoothness of the resulting function. It is parameterized by a length-scale parameter :math:`l>0`, which can either be a scalar (isotropic variant of the kernel) or a vector with the same number of dimensions as the inputs :math:`x` (anisotropic variant of the kernel). The kernel is given by: +the smoothness of the resulting function. It is parameterized by a length-scale parameter :math:`l>0`, which can either be a scalar (isotropic variant of the kernel) or a vector with the same number of dimensions as the inputs :math:`x` (anisotropic variant of the kernel). + +|details-start| +**Mathematical implementation of Matérn kernel** +|details-split| + +The kernel is given by: .. math:: @@ -540,6 +422,9 @@ differentiable (as assumed by the RBF kernel) but at least once (:math:`\nu = The flexibility of controlling the smoothness of the learned function via :math:`\nu` allows adapting to the properties of the true underlying functional relation. + +|details-end| + The prior and posterior of a GP resulting from a Matérn kernel are shown in the following figure: diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index e4cc62b7773f3..01c5a5c72ee52 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -81,7 +81,7 @@ evaluated and the best combination is retained. of Grid Search coupling parameters from a text documents feature extractor (n-gram count vectorizer and TF-IDF transformer) with a classifier (here a linear SVM trained with SGD with either elastic - net or L2 penalty) using a :class:`pipeline.Pipeline` instance. + net or L2 penalty) using a :class:`~sklearn.pipeline.Pipeline` instance. - See :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py` for an example of Grid Search within a cross validation loop on the iris @@ -128,32 +128,29 @@ discrete choices (which will be sampled uniformly) can be specified:: This example uses the ``scipy.stats`` module, which contains many useful distributions for sampling parameters, such as ``expon``, ``gamma``, -``uniform`` or ``randint``. +``uniform``, ``loguniform`` or ``randint``. In principle, any function can be passed that provides a ``rvs`` (random variate sample) method to sample a value. A call to the ``rvs`` function should provide independent random samples from possible parameter values on consecutive calls. - .. warning:: +.. warning:: - The distributions in ``scipy.stats`` prior to version scipy 0.16 - do not allow specifying a random state. Instead, they use the global - numpy random state, that can be seeded via ``np.random.seed`` or set - using ``np.random.set_state``. However, beginning scikit-learn 0.18, - the :mod:`sklearn.model_selection` module sets the random state provided - by the user if scipy >= 0.16 is also available. + The distributions in ``scipy.stats`` prior to version scipy 0.16 + do not allow specifying a random state. Instead, they use the global + numpy random state, that can be seeded via ``np.random.seed`` or set + using ``np.random.set_state``. However, beginning scikit-learn 0.18, + the :mod:`sklearn.model_selection` module sets the random state provided + by the user if scipy >= 0.16 is also available. For continuous parameters, such as ``C`` above, it is important to specify a continuous distribution to take full advantage of the randomization. This way, increasing ``n_iter`` will always lead to a finer search. -A continuous log-uniform random variable is available through -:class:`~sklearn.utils.fixes.loguniform`. This is a continuous version of -log-spaced parameters. For example to specify ``C`` above, ``loguniform(1, -100)`` can be used instead of ``[1, 10, 100]`` or ``np.logspace(0, 2, -num=1000)``. This is an alias to `scipy.stats.loguniform -`_. +A continuous log-uniform random variable is the continuous version of +a log-spaced parameter. For example to specify the equivalent of ``C`` from above, +``loguniform(1, 100)`` can be used instead of ``[1, 10, 100]``. Mirroring the example above in grid search, we can specify a continuous random variable that is log-uniformly distributed between ``1e0`` and ``1e3``:: @@ -433,7 +430,7 @@ ways: :class:`HalvingGridSearchCV`; - by setting `n_candidates='exhaust'`. -Both options are mutally exclusive: using `min_resources='exhaust'` requires +Both options are mutually exclusive: using `min_resources='exhaust'` requires knowing the number of candidates, and symmetrically `n_candidates='exhaust'` requires knowing `min_resources`. @@ -615,7 +612,7 @@ Here, ```` is the parameter name of the nested estimator, in this case ``estimator``. If the meta-estimator is constructed as a collection of estimators as in `pipeline.Pipeline`, then ```` refers to the name of the estimator, -see :ref:`pipeline_nested_parameters`. In practice, there can be several +see :ref:`pipeline_nested_parameters`. In practice, there can be several levels of nesting:: >>> from sklearn.pipeline import Pipeline @@ -660,8 +657,8 @@ Robustness to failure Some parameter settings may result in a failure to ``fit`` one or more folds of the data. By default, this will cause the entire search to fail, even if some parameter settings could be fully evaluated. Setting ``error_score=0`` -(or `=np.NaN`) will make the procedure robust to such failure, issuing a -warning and setting the score for that fold to 0 (or `NaN`), but completing +(or `=np.nan`) will make the procedure robust to such failure, issuing a +warning and setting the score for that fold to 0 (or `nan`), but completing the search. .. _alternative_cv: diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst index f608915f6e6d7..f5879cbffc0a5 100644 --- a/doc/modules/impute.rst +++ b/doc/modules/impute.rst @@ -22,9 +22,9 @@ Univariate vs. Multivariate Imputation One type of imputation algorithm is univariate, which imputes values in the i-th feature dimension using only non-missing values in that feature dimension -(e.g. :class:`impute.SimpleImputer`). By contrast, multivariate imputation +(e.g. :class:`SimpleImputer`). By contrast, multivariate imputation algorithms use the entire set of available feature dimensions to estimate the -missing values (e.g. :class:`impute.IterativeImputer`). +missing values (e.g. :class:`IterativeImputer`). .. _single_imputer: @@ -87,6 +87,8 @@ string values or pandas categoricals when using the ``'most_frequent'`` or ['a' 'y'] ['b' 'y']] +For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`. + .. _iterative_imputer: @@ -176,9 +178,9 @@ cannot be achieved by a single call to ``transform``. References ---------- -.. [1] Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice: Multivariate +.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). "mice: Multivariate Imputation by Chained Equations in R". Journal of Statistical Software 45: - 1-67. + 1-67. `_ .. [2] Roderick J A Little and Donald B Rubin (1986). "Statistical Analysis with Missing Data". John Wiley & Sons, Inc., New York, NY, USA. @@ -190,19 +192,20 @@ Nearest neighbors imputation The :class:`KNNImputer` class provides imputation for filling in missing values using the k-Nearest Neighbors approach. By default, a euclidean distance metric -that supports missing values, :func:`~sklearn.metrics.nan_euclidean_distances`, -is used to find the nearest neighbors. Each missing feature is imputed using -values from ``n_neighbors`` nearest neighbors that have a value for the -feature. The feature of the neighbors are averaged uniformly or weighted by -distance to each neighbor. If a sample has more than one feature missing, then -the neighbors for that sample can be different depending on the particular -feature being imputed. When the number of available neighbors is less than -`n_neighbors` and there are no defined distances to the training set, the -training set average for that feature is used during imputation. If there is at -least one neighbor with a defined distance, the weighted or unweighted average -of the remaining neighbors will be used during imputation. If a feature is -always missing in training, it is removed during `transform`. For more -information on the methodology, see ref. [OL2001]_. +that supports missing values, +:func:`~sklearn.metrics.pairwise.nan_euclidean_distances`, is used to find the +nearest neighbors. Each missing feature is imputed using values from +``n_neighbors`` nearest neighbors that have a value for the feature. The +feature of the neighbors are averaged uniformly or weighted by distance to each +neighbor. If a sample has more than one feature missing, then the neighbors for +that sample can be different depending on the particular feature being imputed. +When the number of available neighbors is less than `n_neighbors` and there are +no defined distances to the training set, the training set average for that +feature is used during imputation. If there is at least one neighbor with a +defined distance, the weighted or unweighted average of the remaining neighbors +will be used during imputation. If a feature is always missing in training, it +is removed during `transform`. For more information on the methodology, see +ref. [OL2001]_. The following snippet demonstrates how to replace missing values, encoded as ``np.nan``, using the mean feature value of the two nearest @@ -219,15 +222,18 @@ neighbors of samples with missing values:: [5.5, 6. , 5. ], [8. , 8. , 7. ]]) +For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`. + .. topic:: References - .. [OL2001] Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, + .. [OL2001] `Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17 no. 6, 2001 Pages 520-525. + `_ -Keeping the number of features constants -======================================== +Keeping the number of features constant +======================================= By default, the scikit-learn imputers will drop fully empty features, i.e. columns containing only missing values. For instance:: @@ -303,10 +309,12 @@ whether or not they contain missing values:: >>> indicator.features_ array([0, 1, 2, 3]) -When using the :class:`MissingIndicator` in a :class:`Pipeline`, be sure to use -the :class:`FeatureUnion` or :class:`ColumnTransformer` to add the indicator -features to the regular features. First we obtain the `iris` dataset, and add -some missing values to it. +When using the :class:`MissingIndicator` in a +:class:`~sklearn.pipeline.Pipeline`, be sure to use the +:class:`~sklearn.pipeline.FeatureUnion` or +:class:`~sklearn.compose.ColumnTransformer` to add the indicator features to +the regular features. First we obtain the `iris` dataset, and add some missing +values to it. >>> from sklearn.datasets import load_iris >>> from sklearn.impute import SimpleImputer, MissingIndicator @@ -319,9 +327,9 @@ some missing values to it. >>> X_train, X_test, y_train, _ = train_test_split(X, y, test_size=100, ... random_state=0) -Now we create a :class:`FeatureUnion`. All features will be imputed using -:class:`SimpleImputer`, in order to enable classifiers to work with this data. -Additionally, it adds the indicator variables from +Now we create a :class:`~sklearn.pipeline.FeatureUnion`. All features will be +imputed using :class:`SimpleImputer`, in order to enable classifiers to work +with this data. Additionally, it adds the indicator variables from :class:`MissingIndicator`. >>> transformer = FeatureUnion( @@ -334,8 +342,8 @@ Additionally, it adds the indicator variables from (100, 8) Of course, we cannot use the transformer to make any predictions. We should -wrap this in a :class:`Pipeline` with a classifier (e.g., a -:class:`DecisionTreeClassifier`) to be able to make predictions. +wrap this in a :class:`~sklearn.pipeline.Pipeline` with a classifier (e.g., a +:class:`~sklearn.tree.DecisionTreeClassifier`) to be able to make predictions. >>> clf = make_pipeline(transformer, DecisionTreeClassifier()) >>> clf = clf.fit(X_train, y_train) diff --git a/doc/modules/isotonic.rst b/doc/modules/isotonic.rst index 8967ef18afcb3..6cfdc1669de5d 100644 --- a/doc/modules/isotonic.rst +++ b/doc/modules/isotonic.rst @@ -9,10 +9,10 @@ Isotonic regression The class :class:`IsotonicRegression` fits a non-decreasing real function to 1-dimensional data. It solves the following problem: - minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2` - - subject to :math:`\hat{y}_i \le \hat{y}_j` whenever :math:`X_i \le X_j`, +.. math:: + \min \sum_i w_i (y_i - \hat{y}_i)^2 +subject to :math:`\hat{y}_i \le \hat{y}_j` whenever :math:`X_i \le X_j`, where the weights :math:`w_i` are strictly positive, and both `X` and `y` are arbitrary real quantities. @@ -31,3 +31,7 @@ thus form a function that is piecewise linear: .. figure:: ../auto_examples/miscellaneous/images/sphx_glr_plot_isotonic_regression_001.png :target: ../auto_examples/miscellaneous/plot_isotonic_regression.html :align: center + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_miscellaneous_plot_isotonic_regression.py` diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst index 2a192d5f4273a..0c67c36178e3b 100644 --- a/doc/modules/kernel_approximation.rst +++ b/doc/modules/kernel_approximation.rst @@ -35,13 +35,65 @@ is advisable to compare results against exact kernel methods when possible. Nystroem Method for Kernel Approximation ---------------------------------------- -The Nystroem method, as implemented in :class:`Nystroem` is a general method -for low-rank approximations of kernels. It achieves this by essentially subsampling -the data on which the kernel is evaluated. -By default :class:`Nystroem` uses the ``rbf`` kernel, but it can use any -kernel function or a precomputed kernel matrix. -The number of samples used - which is also the dimensionality of the features computed - -is given by the parameter ``n_components``. +The Nystroem method, as implemented in :class:`Nystroem` is a general method for +reduced rank approximations of kernels. It achieves this by subsampling without +replacement rows/columns of the data on which the kernel is evaluated. While the +computational complexity of the exact method is +:math:`\mathcal{O}(n^3_{\text{samples}})`, the complexity of the approximation +is :math:`\mathcal{O}(n^2_{\text{components}} \cdot n_{\text{samples}})`, where +one can set :math:`n_{\text{components}} \ll n_{\text{samples}}` without a +significative decrease in performance [WS2001]_. + +We can construct the eigendecomposition of the kernel matrix :math:`K`, based +on the features of the data, and then split it into sampled and unsampled data +points. + +.. math:: + + K = U \Lambda U^T + = \begin{bmatrix} U_1 \\ U_2\end{bmatrix} \Lambda \begin{bmatrix} U_1 \\ U_2 \end{bmatrix}^T + = \begin{bmatrix} U_1 \Lambda U_1^T & U_1 \Lambda U_2^T \\ U_2 \Lambda U_1^T & U_2 \Lambda U_2^T \end{bmatrix} + \equiv \begin{bmatrix} K_{11} & K_{12} \\ K_{21} & K_{22} \end{bmatrix} + +where: + +* :math:`U` is orthonormal +* :math:`\Lambda` is diagonal matrix of eigenvalues +* :math:`U_1` is orthonormal matrix of samples that were chosen +* :math:`U_2` is orthonormal matrix of samples that were not chosen + +Given that :math:`U_1 \Lambda U_1^T` can be obtained by orthonormalization of +the matrix :math:`K_{11}`, and :math:`U_2 \Lambda U_1^T` can be evaluated (as +well as its transpose), the only remaining term to elucidate is +:math:`U_2 \Lambda U_2^T`. To do this we can express it in terms of the already +evaluated matrices: + +.. math:: + + \begin{align} U_2 \Lambda U_2^T &= \left(K_{21} U_1 \Lambda^{-1}\right) \Lambda \left(K_{21} U_1 \Lambda^{-1}\right)^T + \\&= K_{21} U_1 (\Lambda^{-1} \Lambda) \Lambda^{-1} U_1^T K_{21}^T + \\&= K_{21} U_1 \Lambda^{-1} U_1^T K_{21}^T + \\&= K_{21} K_{11}^{-1} K_{21}^T + \\&= \left( K_{21} K_{11}^{-\frac12} \right) \left( K_{21} K_{11}^{-\frac12} \right)^T + .\end{align} + +During ``fit``, the class :class:`Nystroem` evaluates the basis :math:`U_1`, and +computes the normalization constant, :math:`K_{11}^{-\frac12}`. Later, during +``transform``, the kernel matrix is determined between the basis (given by the +`components_` attribute) and the new data points, ``X``. This matrix is then +multiplied by the ``normalization_`` matrix for the final result. + +By default :class:`Nystroem` uses the ``rbf`` kernel, but it can use any kernel +function or a precomputed kernel matrix. The number of samples used - which is +also the dimensionality of the features computed - is given by the parameter +``n_components``. + +.. topic:: Examples: + + * See the example entitled + :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`, + that shows an efficient machine learning pipeline that uses a + :class:`Nystroem` kernel. .. _rbf_kernel_approx: @@ -108,7 +160,7 @@ The additive chi squared kernel as used here is given by k(x, y) = \sum_i \frac{2x_iy_i}{x_i+y_i} -This is not exactly the same as :func:`sklearn.metrics.additive_chi2_kernel`. +This is not exactly the same as :func:`sklearn.metrics.pairwise.additive_chi2_kernel`. The authors of [VZ2010]_ prefer the version above as it is always positive definite. Since the kernel is additive, it is possible to treat all components @@ -163,8 +215,8 @@ function given by: where: - * ``x``, ``y`` are the input vectors - * ``d`` is the kernel degree +* ``x``, ``y`` are the input vectors +* ``d`` is the kernel degree Intuitively, the feature space of the polynomial kernel of degree `d` consists of all possible degree-`d` products among input features, which enables @@ -233,13 +285,16 @@ or store training examples. .. topic:: References: + .. [WS2001] `"Using the Nyström method to speed up kernel machines" + `_ + Williams, C.K.I.; Seeger, M. - 2001. .. [RR2007] `"Random features for large-scale kernel machines" `_ Rahimi, A. and Recht, B. - Advances in neural information processing 2007, .. [LS2010] `"Random Fourier approximations for skewed multiplicative histogram kernels" `_ Li, F., Ionescu, C., and Sminchisescu, C. - - Pattern Recognition, DAGM 2010, Lecture Notes in Computer Science. + - Pattern Recognition, DAGM 2010, Lecture Notes in Computer Science. .. [VZ2010] `"Efficient additive kernels via explicit feature maps" `_ Vedaldi, A. and Zisserman, A. - Computer Vision and Pattern Recognition 2010 @@ -250,7 +305,7 @@ or store training examples. <10.1145/2487575.2487591>` Pham, N., & Pagh, R. - 2013 .. [CCF2002] `"Finding frequent items in data streams" - `_ + `_ Charikar, M., Chen, K., & Farach-Colton - 2002 .. [WIKICS] `"Wikipedia: Count sketch" `_ diff --git a/doc/modules/kernel_ridge.rst b/doc/modules/kernel_ridge.rst index 286e9d4ac5322..5d25ce71f5ea1 100644 --- a/doc/modules/kernel_ridge.rst +++ b/doc/modules/kernel_ridge.rst @@ -55,6 +55,9 @@ dense model. :target: ../auto_examples/miscellaneous/plot_kernel_ridge_regression.html :align: center +.. topic:: Examples + + * :ref:`sphx_glr_auto_examples_miscellaneous_plot_kernel_ridge_regression.py` .. topic:: References: diff --git a/doc/modules/lda_qda.rst b/doc/modules/lda_qda.rst index 02b6c88cb7001..850a848fe3f73 100644 --- a/doc/modules/lda_qda.rst +++ b/doc/modules/lda_qda.rst @@ -137,7 +137,7 @@ Mathematical formulation of LDA dimensionality reduction First note that the K means :math:`\mu_k` are vectors in :math:`\mathcal{R}^d`, and they lie in an affine subspace :math:`H` of dimension at most :math:`K - 1` (2 points lie on a line, 3 points lie on a -plane, etc). +plane, etc.). As mentioned above, we can interpret LDA as assigning :math:`x` to the class whose mean :math:`\mu_k` is the closest in terms of Mahalanobis distance, @@ -190,7 +190,7 @@ matrix. The shrunk Ledoit and Wolf estimator of covariance may not always be the best choice. For example if the distribution of the data is normally distributed, the -Oracle Shrinkage Approximating estimator :class:`sklearn.covariance.OAS` +Oracle Approximating Shrinkage estimator :class:`sklearn.covariance.OAS` yields a smaller Mean Squared Error than the one given by Ledoit and Wolf's formula used with shrinkage="auto". In LDA, the data are assumed to be gaussian conditionally to the class. If these assumptions hold, using LDA with diff --git a/doc/modules/learning_curve.rst b/doc/modules/learning_curve.rst index 0ce64063d4cd9..3d458a1a67416 100644 --- a/doc/modules/learning_curve.rst +++ b/doc/modules/learning_curve.rst @@ -71,7 +71,7 @@ The function :func:`validation_curve` can help in this case:: >>> import numpy as np >>> from sklearn.model_selection import validation_curve >>> from sklearn.datasets import load_iris - >>> from sklearn.linear_model import Ridge + >>> from sklearn.svm import SVC >>> np.random.seed(0) >>> X, y = load_iris(return_X_y=True) @@ -80,30 +80,50 @@ The function :func:`validation_curve` can help in this case:: >>> X, y = X[indices], y[indices] >>> train_scores, valid_scores = validation_curve( - ... Ridge(), X, y, param_name="alpha", param_range=np.logspace(-7, 3, 3), - ... cv=5) + ... SVC(kernel="linear"), X, y, param_name="C", param_range=np.logspace(-7, 3, 3), + ... ) >>> train_scores - array([[0.93..., 0.94..., 0.92..., 0.91..., 0.92...], - [0.93..., 0.94..., 0.92..., 0.91..., 0.92...], - [0.51..., 0.52..., 0.49..., 0.47..., 0.49...]]) + array([[0.90..., 0.94..., 0.91..., 0.89..., 0.92...], + [0.9... , 0.92..., 0.93..., 0.92..., 0.93...], + [0.97..., 1... , 0.98..., 0.97..., 0.99...]]) >>> valid_scores - array([[0.90..., 0.84..., 0.94..., 0.96..., 0.93...], - [0.90..., 0.84..., 0.94..., 0.96..., 0.93...], - [0.46..., 0.25..., 0.50..., 0.49..., 0.52...]]) + array([[0.9..., 0.9... , 0.9... , 0.96..., 0.9... ], + [0.9..., 0.83..., 0.96..., 0.96..., 0.93...], + [1.... , 0.93..., 1.... , 1.... , 0.9... ]]) + +If you intend to plot the validation curves only, the class +:class:`~sklearn.model_selection.ValidationCurveDisplay` is more direct than +using matplotlib manually on the results of a call to :func:`validation_curve`. +You can use the method +:meth:`~sklearn.model_selection.ValidationCurveDisplay.from_estimator` similarly +to :func:`validation_curve` to generate and plot the validation curve: + +.. plot:: + :context: close-figs + :align: center + + from sklearn.datasets import load_iris + from sklearn.model_selection import ValidationCurveDisplay + from sklearn.svm import SVC + from sklearn.utils import shuffle + X, y = load_iris(return_X_y=True) + X, y = shuffle(X, y, random_state=0) + ValidationCurveDisplay.from_estimator( + SVC(kernel="linear"), X, y, param_name="C", param_range=np.logspace(-7, 3, 10) + ) If the training score and the validation score are both low, the estimator will be underfitting. If the training score is high and the validation score is low, the estimator is overfitting and otherwise it is working very well. A low training score and a high validation score is usually not possible. Underfitting, overfitting, and a working model are shown in the in the plot below where we vary -the parameter :math:`\gamma` of an SVM on the digits dataset. +the parameter `gamma` of an SVM with an RBF kernel on the digits dataset. .. figure:: ../auto_examples/model_selection/images/sphx_glr_plot_validation_curve_001.png :target: ../auto_examples/model_selection/plot_validation_curve.html :align: center :scale: 50% - .. _learning_curve: Learning curve diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index e8541d82d5fb3..275ee01eb022f 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -37,7 +37,7 @@ solves a problem of the form: :align: center :scale: 50% -:class:`LinearRegression` will take in its ``fit`` method arrays X, y +:class:`LinearRegression` will take in its ``fit`` method arrays ``X``, ``y`` and will store the coefficients :math:`w` of the linear model in its ``coef_`` member:: @@ -114,7 +114,7 @@ of shrinkage and thus the coefficients become more robust to collinearity. As with other linear models, :class:`Ridge` will take in its ``fit`` method -arrays X, y and will store the coefficients :math:`w` of the linear model in +arrays ``X``, ``y`` and will store the coefficients :math:`w` of the linear model in its ``coef_`` member:: >>> from sklearn import linear_model @@ -174,9 +174,9 @@ a linear kernel. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_linear_model_plot_ridge_path.py` - * :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` - * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` + * :ref:`sphx_glr_auto_examples_linear_model_plot_ridge_path.py` + * :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` + * :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py` Ridge Complexity ---------------- @@ -193,9 +193,14 @@ This method has the same order of complexity as Setting the regularization parameter: leave-one-out Cross-Validation -------------------------------------------------------------------- -:class:`RidgeCV` implements ridge regression with built-in -cross-validation of the alpha parameter. The object works in the same way -as GridSearchCV except that it defaults to Leave-One-Out Cross-Validation:: +:class:`RidgeCV` and :class:`RidgeClassifierCV` implement ridge +regression/classification with built-in cross-validation of the alpha parameter. +They work in the same way as :class:`~sklearn.model_selection.GridSearchCV` except +that it defaults to efficient Leave-One-Out :term:`cross-validation`. +When using the default :term:`cross-validation`, alpha cannot be 0 due to the +formulation used to calculate Leave-One-Out error. See [RL2007]_ for details. + +Usage example:: >>> import numpy as np >>> from sklearn import linear_model @@ -211,13 +216,13 @@ cross-validation with :class:`~sklearn.model_selection.GridSearchCV`, for example `cv=10` for 10-fold cross-validation, rather than Leave-One-Out Cross-Validation. -.. topic:: References +.. topic:: References: - * "Notes on Regularized Least Squares", Rifkin & Lippert (`technical report - `_, - `course slides - `_). + .. [RL2007] "Notes on Regularized Least Squares", Rifkin & Lippert (`technical report + `_, + `course slides + `_). .. _lasso: @@ -270,20 +275,23 @@ computes the coefficients along the full path of possible values. thus be used to perform feature selection, as detailed in :ref:`l1_feature_selection`. +|details-start| +**References** +|details-split| + The following two references explain the iterations used in the coordinate descent solver of scikit-learn, as well as the duality gap computation used for convergence control. -.. topic:: References - - * "Regularization Path For Generalized linear Models by Coordinate Descent", - Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper - `__). - * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," - S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, - in IEEE Journal of Selected Topics in Signal Processing, 2007 - (`Paper `__) +* "Regularization Path For Generalized linear Models by Coordinate Descent", + Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper + `__). +* "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," + S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, + in IEEE Journal of Selected Topics in Signal Processing, 2007 + (`Paper `__) +|details-end| Setting regularization parameter -------------------------------- @@ -340,13 +348,25 @@ the problem is badly conditioned (e.g. more features than samples). :align: center :scale: 50% +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py` + * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars_ic.py` + .. _aic_bic: -**Mathematical details** +AIC and BIC criteria +^^^^^^^^^^^^^^^^^^^^ The definition of AIC (and thus BIC) might differ in the literature. In this section, we give more information regarding the criterion computed in -scikit-learn. The AIC criterion is defined as: +scikit-learn. + +|details-start| +**Mathematical details** +|details-split| + +The AIC criterion is defined as: .. math:: AIC = -2 \log(\hat{L}) + 2 d @@ -394,22 +414,19 @@ where :math:`p` is the number of features and :math:`\hat{y}_i` is the predicted target using an ordinary least squares regression. Note, that this formula is valid only when `n_samples > n_features`. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_lars_ic.py` - -.. topic:: References +.. topic:: References: .. [12] :arxiv:`Zou, Hui, Trevor Hastie, and Robert Tibshirani. - "On the degrees of freedom of the lasso." - The Annals of Statistics 35.5 (2007): 2173-2192. - <0712.0881.pdf>` + "On the degrees of freedom of the lasso." + The Annals of Statistics 35.5 (2007): 2173-2192. + <0712.0881.pdf>` .. [13] :doi:`Cherkassky, Vladimir, and Yunqian Ma. - "Comparison of model selection for regression." - Neural computation 15.7 (2003): 1691-1714. - <10.1162/089976603321891864>` + "Comparison of model selection for regression." + Neural computation 15.7 (2003): 1691-1714. + <10.1162/089976603321891864>` + +|details-end| Comparison with the regularization parameter of SVM ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -451,6 +468,10 @@ the MultiTaskLasso are full columns. * :ref:`sphx_glr_auto_examples_linear_model_plot_multi_task_lasso_support.py` +|details-start| +**Mathematical details** +|details-split| + Mathematically, it consists of a linear model trained with a mixed :math:`\ell_1` :math:`\ell_2`-norm for regularization. The objective function to minimize is: @@ -468,6 +489,7 @@ and :math:`\ell_1` :math:`\ell_2` reads The implementation in the class :class:`MultiTaskLasso` uses coordinate descent as the algorithm to fit the coefficients. +|details-end| .. _elastic_net: @@ -508,20 +530,25 @@ The class :class:`ElasticNetCV` can be used to set the parameters * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py` * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py` + * :ref:`sphx_glr_auto_examples_linear_model_plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py` + +|details-start| +**References** +|details-split| The following two references explain the iterations used in the coordinate descent solver of scikit-learn, as well as the duality gap computation used for convergence control. -.. topic:: References +* "Regularization Path For Generalized linear Models by Coordinate Descent", + Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper + `__). +* "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," + S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, + in IEEE Journal of Selected Topics in Signal Processing, 2007 + (`Paper `__) - * "Regularization Path For Generalized linear Models by Coordinate Descent", - Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper - `__). - * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares," - S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky, - in IEEE Journal of Selected Topics in Signal Processing, 2007 - (`Paper `__) +|details-end| .. _multi_task_elastic_net: @@ -563,30 +590,30 @@ between the features. The advantages of LARS are: - - It is numerically efficient in contexts where the number of features - is significantly greater than the number of samples. +- It is numerically efficient in contexts where the number of features + is significantly greater than the number of samples. - - It is computationally just as fast as forward selection and has - the same order of complexity as ordinary least squares. +- It is computationally just as fast as forward selection and has + the same order of complexity as ordinary least squares. - - It produces a full piecewise linear solution path, which is - useful in cross-validation or similar attempts to tune the model. +- It produces a full piecewise linear solution path, which is + useful in cross-validation or similar attempts to tune the model. - - If two features are almost equally correlated with the target, - then their coefficients should increase at approximately the same - rate. The algorithm thus behaves as intuition would expect, and - also is more stable. +- If two features are almost equally correlated with the target, + then their coefficients should increase at approximately the same + rate. The algorithm thus behaves as intuition would expect, and + also is more stable. - - It is easily modified to produce solutions for other estimators, - like the Lasso. +- It is easily modified to produce solutions for other estimators, + like the Lasso. The disadvantages of the LARS method include: - - Because LARS is based upon an iterative refitting of the - residuals, it would appear to be especially sensitive to the - effects of noise. This problem is discussed in detail by Weisberg - in the discussion section of the Efron et al. (2004) Annals of - Statistics article. +- Because LARS is based upon an iterative refitting of the + residuals, it would appear to be especially sensitive to the + effects of noise. This problem is discussed in detail by Weisberg + in the discussion section of the Efron et al. (2004) Annals of + Statistics article. The LARS model can be used via the estimator :class:`Lars`, or its low-level implementation :func:`lars_path` or :func:`lars_path_gram`. @@ -623,8 +650,9 @@ the regularization parameter almost for free, thus a common operation is to retrieve the path with one of the functions :func:`lars_path` or :func:`lars_path_gram`. -Mathematical formulation ------------------------- +|details-start| +**Mathematical formulation** +|details-split| The algorithm is similar to forward stepwise regression, but instead of including features at each step, the estimated coefficients are @@ -643,6 +671,7 @@ column is always zero. `_ by Hastie et al. +|details-end| .. _omp: @@ -657,7 +686,7 @@ orthogonal matching pursuit can approximate the optimum solution vector with a fixed number of non-zero elements: .. math:: - \underset{w}{\operatorname{arg\,min\,}} ||y - Xw||_2^2 \text{ subject to } ||w||_0 \leq n_{\text{nonzero\_coefs}} + \underset{w}{\operatorname{arg\,min\,}} ||y - Xw||_2^2 \text{ subject to } ||w||_0 \leq n_{\text{nonzero_coefs}} Alternatively, orthogonal matching pursuit can target a specific error instead of a specific number of non-zero coefficients. This can be expressed as: @@ -677,14 +706,17 @@ previously chosen dictionary elements. * :ref:`sphx_glr_auto_examples_linear_model_plot_omp.py` -.. topic:: References: +|details-start| +**References** +|details-split| - * https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf +* https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf - * `Matching pursuits with time-frequency dictionaries - `_, - S. G. Mallat, Z. Zhang, +* `Matching pursuits with time-frequency dictionaries + `_, + S. G. Mallat, Z. Zhang, +|details-end| .. _bayesian_regression: @@ -707,29 +739,33 @@ variable to be estimated from the data. To obtain a fully probabilistic model, the output :math:`y` is assumed to be Gaussian distributed around :math:`X w`: -.. math:: p(y|X,w,\alpha) = \mathcal{N}(y|X w,\alpha) +.. math:: p(y|X,w,\alpha) = \mathcal{N}(y|X w,\alpha^{-1}) where :math:`\alpha` is again treated as a random variable that is to be estimated from the data. The advantages of Bayesian Regression are: - - It adapts to the data at hand. +- It adapts to the data at hand. - - It can be used to include regularization parameters in the - estimation procedure. +- It can be used to include regularization parameters in the + estimation procedure. The disadvantages of Bayesian regression include: - - Inference of the model can be time consuming. +- Inference of the model can be time consuming. -.. topic:: References +|details-start| +**References** +|details-split| - * A good introduction to Bayesian methods is given in C. Bishop: Pattern - Recognition and Machine learning +* A good introduction to Bayesian methods is given in C. Bishop: Pattern + Recognition and Machine learning - * Original Algorithm is detailed in the book `Bayesian learning for neural - networks` by Radford M. Neal +* Original Algorithm is detailed in the book `Bayesian learning for neural + networks` by Radford M. Neal + +|details-end| .. _bayesian_ridge_regression: @@ -790,13 +826,17 @@ is more robust to ill-posed problems. * :ref:`sphx_glr_auto_examples_linear_model_plot_bayesian_ridge_curvefit.py` -.. topic:: References: +|details-start| +**References** +|details-split| + +* Section 3.3 in Christopher M. Bishop: Pattern Recognition and Machine Learning, 2006 - * Section 3.3 in Christopher M. Bishop: Pattern Recognition and Machine Learning, 2006 +* David J. C. MacKay, `Bayesian Interpolation `_, 1992. - * David J. C. MacKay, `Bayesian Interpolation `_, 1992. +* Michael E. Tipping, `Sparse Bayesian Learning and the Relevance Vector Machine `_, 2001. - * Michael E. Tipping, `Sparse Bayesian Learning and the Relevance Vector Machine `_, 2001. +|details-end| .. _automatic_relevance_determination: @@ -832,16 +872,16 @@ Ridge Regression`_, see the example below. * :ref:`sphx_glr_auto_examples_linear_model_plot_ard.py` -.. topic:: References: - .. [1] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 7.2.1 +.. topic:: References: - .. [2] David Wipf and Srikantan Nagarajan: `A New View of Automatic Relevance Determination `_ + .. [1] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 7.2.1 - .. [3] Michael E. Tipping: `Sparse Bayesian Learning and the Relevance Vector Machine `_ + .. [2] David Wipf and Srikantan Nagarajan: `A New View of Automatic Relevance Determination `_ - .. [4] Tristan Fletcher: `Relevance Vector Machines Explained `_ + .. [3] Michael E. Tipping: `Sparse Bayesian Learning and the Relevance Vector Machine `_ + .. [4] Tristan Fletcher: `Relevance Vector Machines Explained `_ .. _Logistic_regression: @@ -878,6 +918,18 @@ regularization. implemented in scikit-learn, so it expects a categorical target, making the Logistic Regression a classifier. +.. topic:: Examples + + * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_l1_l2_sparsity.py` + + * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py` + + * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py` + + * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_20newsgroups.py` + + * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_mnist.py` + Binary Case ----------- @@ -889,14 +941,24 @@ the probability of the positive class :math:`P(y_i=1|X_i)` as .. math:: \hat{p}(X_i) = \operatorname{expit}(X_i w + w_0) = \frac{1}{1 + \exp(-X_i w - w_0)}. + As an optimization problem, binary class logistic regression with regularization term :math:`r(w)` minimizes the following cost function: -.. math:: \min_{w} C \sum_{i=1}^n \left(-y_i \log(\hat{p}(X_i)) - (1 - y_i) \log(1 - \hat{p}(X_i))\right) + r(w). +.. math:: + :name: regularized-logistic-loss + + \min_{w} \frac{1}{S}\sum_{i=1}^n s_i + \left(-y_i \log(\hat{p}(X_i)) - (1 - y_i) \log(1 - \hat{p}(X_i))\right) + + \frac{r(w)}{S C}\,, +where :math:`{s_i}` corresponds to the weights assigned by the user to a +specific training sample (the vector :math:`s` is formed by element-wise +multiplication of the class weights and sample weights), +and the sum :math:`S = \sum_{i=1}^n s_i`. -We currently provide four choices for the regularization term :math:`r(w)` via +We currently provide four choices for the regularization term :math:`r(w)` via the `penalty` argument: +----------------+-------------------------------------------------+ @@ -916,6 +978,11 @@ controls the strength of :math:`\ell_1` regularization vs. :math:`\ell_2` regularization. Elastic-Net is equivalent to :math:`\ell_1` when :math:`\rho = 1` and equivalent to :math:`\ell_2` when :math:`\rho=0`. +Note that the scale of the class weights and the sample weights will influence +the optimization problem. For instance, multiplying the sample weights by a +constant :math:`b>0` is equivalent to multiplying the (inverse) regularization +strength `C` by :math:`b`. + Multinomial Case ---------------- @@ -933,6 +1000,10 @@ logistic regression, see also `log-linear model especially important when using regularization. The choice of overparameterization can be detrimental for unpenalized models since then the solution may not be unique, as shown in [16]_. +|details-start| +**Mathematical details** +|details-split| + Let :math:`y_i \in {1, \ldots, K}` be the label (ordinal) encoded target variable for observation :math:`i`. Instead of a single coefficient vector, we now have a matrix of coefficients :math:`W` where each row vector :math:`W_k` corresponds to class @@ -943,93 +1014,58 @@ a matrix of coefficients :math:`W` where each row vector :math:`W_k` corresponds The objective for the optimization becomes -.. math:: \min_W -C \sum_{i=1}^n \sum_{k=0}^{K-1} [y_i = k] \log(\hat{p}_k(X_i)) + r(W). +.. math:: + \min_W -\frac{1}{S}\sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik} [y_i = k] \log(\hat{p}_k(X_i)) + + \frac{r(W)}{S C}\,. Where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0` -if :math:`P` is false, otherwise it evaluates to :math:`1`. We currently provide four choices -for the regularization term :math:`r(W)` via the `penalty` argument: +if :math:`P` is false, otherwise it evaluates to :math:`1`. + +Again, :math:`s_{ik}` are the weights assigned by the user (multiplication of sample +weights and class weights) with their sum :math:`S = \sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik}`. + +We currently provide four choices +for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m` +is the number of features: +----------------+----------------------------------------------------------------------------------+ | penalty | :math:`r(W)` | +================+==================================================================================+ | `None` | :math:`0` | +----------------+----------------------------------------------------------------------------------+ -| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^n\sum_{j=1}^{K}|W_{i,j}|` | +| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|` | +----------------+----------------------------------------------------------------------------------+ -| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^n\sum_{j=1}^{K} W_{i,j}^2` | +| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2` | +----------------+----------------------------------------------------------------------------------+ | `ElasticNet` | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}` | +----------------+----------------------------------------------------------------------------------+ +|details-end| + Solvers ------- The solvers implemented in the class :class:`LogisticRegression` are "lbfgs", "liblinear", "newton-cg", "newton-cholesky", "sag" and "saga": -The solver "liblinear" uses a coordinate descent (CD) algorithm, and relies -on the excellent C++ `LIBLINEAR library -`_, which is shipped with -scikit-learn. However, the CD algorithm implemented in liblinear cannot learn -a true multinomial (multiclass) model; instead, the optimization problem is -decomposed in a "one-vs-rest" fashion so separate binary classifiers are -trained for all classes. This happens under the hood, so -:class:`LogisticRegression` instances using this solver behave as multiclass -classifiers. For :math:`\ell_1` regularization :func:`sklearn.svm.l1_min_c` allows to -calculate the lower bound for C in order to get a non "null" (all feature -weights to zero) model. - -The "lbfgs", "newton-cg" and "sag" solvers only support :math:`\ell_2` -regularization or no regularization, and are found to converge faster for some -high-dimensional data. Setting `multi_class` to "multinomial" with these solvers -learns a true multinomial logistic regression model [5]_, which means that its -probability estimates should be better calibrated than the default "one-vs-rest" -setting. - -The "sag" solver uses Stochastic Average Gradient descent [6]_. It is faster -than other solvers for large datasets, when both the number of samples and the -number of features are large. - -The "saga" solver [7]_ is a variant of "sag" that also supports the -non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse -multinomial logistic regression. It is also the only solver that supports -`penalty="elasticnet"`. - -The "lbfgs" is an optimization algorithm that approximates the -Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to -quasi-Newton methods. As such, it can deal with a wide range of different training -data and is therefore the default solver. Its performance, however, suffers on poorly -scaled datasets and on datasets with one-hot encoded categorical features with rare -categories. - -The "newton-cholesky" solver is an exact Newton solver that calculates the hessian -matrix and solves the resulting linear system. It is a very good choice for -`n_samples` >> `n_features`, but has a few shortcomings: Only :math:`\ell_2` -regularization is supported. Furthermore, because the hessian matrix is explicitly -computed, the memory usage has a quadratic dependency on `n_features` as well as on -`n_classes`. As a consequence, only the one-vs-rest scheme is implemented for the -multiclass case. - -For a comparison of some of these solvers, see [9]_. - -The following table summarizes the penalties supported by each solver: +The following table summarizes the penalties and multinomial multiclass supported by each solver: +------------------------------+-----------------+-------------+-----------------+-----------------------+-----------+------------+ | | **Solvers** | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ | **Penalties** | **'lbfgs'** | **'liblinear'** | **'newton-cg'** | **'newton-cholesky'** | **'sag'** | **'saga'** | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| Multinomial + L2 penalty | yes | no | yes | no | yes | yes | +| L2 penalty | yes | no | yes | no | yes | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| OVR + L2 penalty | yes | yes | yes | yes | yes | yes | +| L1 penalty | no | yes | no | no | no | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| Multinomial + L1 penalty | no | no | no | no | no | yes | +| Elastic-Net (L1 + L2) | no | no | no | no | no | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| OVR + L1 penalty | no | yes | no | no | no | yes | +| No penalty ('none') | yes | no | yes | yes | yes | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| Elastic-Net | no | no | no | no | no | yes | +| **Multiclass support** | | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ -| No penalty ('none') | yes | no | yes | yes | yes | yes | +| multinomial multiclass | yes | no | yes | no | yes | yes | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ | **Behaviors** | | +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+ @@ -1045,32 +1081,92 @@ the "saga" solver is usually faster. For large dataset, you may also consider using :class:`SGDClassifier` with `loss="log_loss"`, which might be even faster but requires more tuning. -.. topic:: Examples: +.. _liblinear_differences: - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_l1_l2_sparsity.py` +Differences between solvers +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There might be a difference in the scores obtained between +:class:`LogisticRegression` with ``solver=liblinear`` or +:class:`~sklearn.svm.LinearSVC` and the external liblinear library directly, +when ``fit_intercept=False`` and the fit ``coef_`` (or) the data to be predicted +are zeroes. This is because for the sample(s) with ``decision_function`` zero, +:class:`LogisticRegression` and :class:`~sklearn.svm.LinearSVC` predict the +negative class, while liblinear predicts the positive class. Note that a model +with ``fit_intercept=False`` and having many samples with ``decision_function`` +zero, is likely to be a underfit, bad model and you are advised to set +``fit_intercept=True`` and increase the ``intercept_scaling``. + +|details-start| +**Solvers' details** +|details-split| + +* The solver "liblinear" uses a coordinate descent (CD) algorithm, and relies + on the excellent C++ `LIBLINEAR library + `_, which is shipped with + scikit-learn. However, the CD algorithm implemented in liblinear cannot learn + a true multinomial (multiclass) model; instead, the optimization problem is + decomposed in a "one-vs-rest" fashion so separate binary classifiers are + trained for all classes. This happens under the hood, so + :class:`LogisticRegression` instances using this solver behave as multiclass + classifiers. For :math:`\ell_1` regularization :func:`sklearn.svm.l1_min_c` allows to + calculate the lower bound for C in order to get a non "null" (all feature + weights to zero) model. + +* The "lbfgs", "newton-cg" and "sag" solvers only support :math:`\ell_2` + regularization or no regularization, and are found to converge faster for some + high-dimensional data. Setting `multi_class` to "multinomial" with these solvers + learns a true multinomial logistic regression model [5]_, which means that its + probability estimates should be better calibrated than the default "one-vs-rest" + setting. + +* The "sag" solver uses Stochastic Average Gradient descent [6]_. It is faster + than other solvers for large datasets, when both the number of samples and the + number of features are large. + +* The "saga" solver [7]_ is a variant of "sag" that also supports the + non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse + multinomial logistic regression. It is also the only solver that supports + `penalty="elasticnet"`. + +* The "lbfgs" is an optimization algorithm that approximates the + Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to + quasi-Newton methods. As such, it can deal with a wide range of different training + data and is therefore the default solver. Its performance, however, suffers on poorly + scaled datasets and on datasets with one-hot encoded categorical features with rare + categories. + +* The "newton-cholesky" solver is an exact Newton solver that calculates the hessian + matrix and solves the resulting linear system. It is a very good choice for + `n_samples` >> `n_features`, but has a few shortcomings: Only :math:`\ell_2` + regularization is supported. Furthermore, because the hessian matrix is explicitly + computed, the memory usage has a quadratic dependency on `n_features` as well as on + `n_classes`. As a consequence, only the one-vs-rest scheme is implemented for the + multiclass case. - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py` +For a comparison of some of these solvers, see [9]_. - * :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py` +.. topic:: References: - * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_20newsgroups.py` + .. [5] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 4.3.4 - * :ref:`sphx_glr_auto_examples_linear_model_plot_sparse_logistic_regression_mnist.py` + .. [6] Mark Schmidt, Nicolas Le Roux, and Francis Bach: `Minimizing Finite Sums with the Stochastic Average Gradient. `_ -.. _liblinear_differences: + .. [7] Aaron Defazio, Francis Bach, Simon Lacoste-Julien: + :arxiv:`SAGA: A Fast Incremental Gradient Method With Support for + Non-Strongly Convex Composite Objectives. <1407.0202>` -.. topic:: Differences from liblinear: + .. [8] https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm + + .. [9] Thomas P. Minka `"A comparison of numerical optimizers for logistic regression" + `_ + + .. [16] :arxiv:`Simon, Noah, J. Friedman and T. Hastie. + "A Blockwise Descent Algorithm for Group-penalized Multiresponse and + Multinomial Regression." <1311.6529>` + +|details-end| - There might be a difference in the scores obtained between - :class:`LogisticRegression` with ``solver=liblinear`` - or :class:`LinearSVC` and the external liblinear library directly, - when ``fit_intercept=False`` and the fit ``coef_`` (or) the data to - be predicted are zeroes. This is because for the sample(s) with - ``decision_function`` zero, :class:`LogisticRegression` and :class:`LinearSVC` - predict the negative class, while liblinear predicts the positive class. - Note that a model with ``fit_intercept=False`` and having many samples with - ``decision_function`` zero, is likely to be a underfit, bad model and you are - advised to set ``fit_intercept=True`` and increase the intercept_scaling. .. note:: **Feature selection with sparse logistic regression** @@ -1092,25 +1188,6 @@ according to the ``scoring`` attribute. The "newton-cg", "sag", "saga" and "lbfgs" solvers are found to be faster for high-dimensional dense data, due to warm-starting (see :term:`Glossary `). -.. topic:: References: - - .. [5] Christopher M. Bishop: Pattern Recognition and Machine Learning, Chapter 4.3.4 - - .. [6] Mark Schmidt, Nicolas Le Roux, and Francis Bach: `Minimizing Finite Sums with the Stochastic Average Gradient. `_ - - .. [7] Aaron Defazio, Francis Bach, Simon Lacoste-Julien: - :arxiv:`SAGA: A Fast Incremental Gradient Method With Support for - Non-Strongly Convex Composite Objectives. <1407.0202>` - - .. [8] https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm - - .. [9] Thomas P. Minka `"A comparison of numerical optimizers for logistic regression" - `_ - - .. [16] :arxiv:`Simon, Noah, J. Friedman and T. Hastie. - "A Blockwise Descent Algorithm for Group-penalized Multiresponse and - Multinomial Regression." <1311.6529>` - .. _Generalized_linear_regression: .. _Generalized_linear_models: @@ -1145,7 +1222,7 @@ Normal :math:`y \in (-\infty, \infty)` :math:`(y-\hat{y})^2` Bernoulli :math:`y \in \{0, 1\}` :math:`2({y}\log\frac{y}{\hat{y}}+({1}-{y})\log\frac{{1}-{y}}{{1}-\hat{y}})` Categorical :math:`y \in \{0, 1, ..., k\}` :math:`2\sum_{i \in \{0, 1, ..., k\}} I(y = i) y_\text{i}\log\frac{I(y = i)}{\hat{I(y = i)}}` Poisson :math:`y \in [0, \infty)` :math:`2(y\log\frac{y}{\hat{y}}-y+\hat{y})` -Gamma :math:`y \in (0, \infty)` :math:`2(\log\frac{y}{\hat{y}}+\frac{y}{\hat{y}}-1)` +Gamma :math:`y \in (0, \infty)` :math:`2(\log\frac{\hat{y}}{y}+\frac{y}{\hat{y}}-1)` Inverse Gaussian :math:`y \in (0, \infty)` :math:`\frac{(y-\hat{y})^2}{y\hat{y}^2}` ================= ================================ ============================================ @@ -1161,13 +1238,13 @@ in the following figure, mass at :math:`Y=0` for the Poisson distribution and the Tweedie (power=1.5) distribution, but not for the Gamma distribution which has a strictly positive target domain. - + The Bernoulli distribution is a discrete probability distribution modelling a Bernoulli trial - an event that has only two mutually exclusive outcomes. The Categorical distribution is a generalization of the Bernoulli distribution for a categorical random variable. While a random variable in a Bernoulli distribution has two possible outcomes, a Categorical random variable can take -on one of K possible categories, with the probability of each category +on one of K possible categories, with the probability of each category specified separately. The choice of the distribution depends on the problem at hand: @@ -1186,7 +1263,9 @@ The choice of the distribution depends on the problem at hand: used for multiclass classification. -Examples of use cases include: +|details-start| +**Examples of use cases** +|details-split| * Agriculture / weather modeling: number of rain events per year (Poisson), amount of rainfall per event (Gamma), total rainfall per year (Tweedie / @@ -1194,7 +1273,7 @@ Examples of use cases include: * Risk modeling / insurance policy pricing: number of claim events / policyholder per year (Poisson), cost per event (Gamma), total cost per policyholder per year (Tweedie / Compound Poisson Gamma). -* Credit Default: probability that a loan can't be payed back (Bernouli). +* Credit Default: probability that a loan can't be paid back (Bernoulli). * Fraud Detection: probability that a financial transaction like a cash transfer is a fraudulent transaction (Bernoulli). * Predictive maintenance: number of production interruption events per year @@ -1205,15 +1284,17 @@ Examples of use cases include: * News Classification: classification of news articles into three categories namely Business News, Politics and Entertainment news (Categorical). +|details-end| + .. topic:: References: - .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, - Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5. + .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, + Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5. - .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models - and analysis of deviance. Monografias de matemática, no. 51. See also - `Exponential dispersion model. - `_ + .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models + and analysis of deviance. Monografias de matemática, no. 51. See also + `Exponential dispersion model. + `_ Usage ----- @@ -1247,13 +1328,14 @@ Usage example:: -0.7638... -.. topic:: Examples: +.. topic:: Examples * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py` * :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py` -Practical considerations ------------------------- +|details-start| +**Practical considerations** +|details-split| The feature matrix `X` should be standardized before fitting. This ensures that the penalty treats features equally. @@ -1276,6 +1358,8 @@ When performing cross-validation for the `power` parameter of because the default scorer :meth:`TweedieRegressor.score` is a function of `power` itself. +|details-end| + Stochastic Gradient Descent - SGD ================================= @@ -1291,9 +1375,7 @@ E.g., with ``loss="log"``, :class:`SGDClassifier` fits a logistic regression model, while with ``loss="hinge"`` it fits a linear support vector machine (SVM). -.. topic:: References - - * :ref:`sgd` +You can refer to the dedicated :ref:`sgd` documentation section for more details. .. _perceptron: @@ -1303,16 +1385,21 @@ Perceptron The :class:`Perceptron` is another simple classification algorithm suitable for large scale learning. By default: - - It does not require a learning rate. +- It does not require a learning rate. - - It is not regularized (penalized). +- It is not regularized (penalized). - - It updates its model only on mistakes. +- It updates its model only on mistakes. The last characteristic implies that the Perceptron is slightly faster to train than SGD with the hinge loss and that the resulting models are sparser. +In fact, the :class:`Perceptron` is a wrapper around the :class:`SGDClassifier` +class using a perceptron loss and a constant learning rate. Refer to +:ref:`mathematical section ` of the SGD procedure +for more details. + .. _passive_aggressive: Passive Aggressive Algorithms @@ -1329,13 +1416,15 @@ For classification, :class:`PassiveAggressiveClassifier` can be used with ``loss='epsilon_insensitive'`` (PA-I) or ``loss='squared_epsilon_insensitive'`` (PA-II). -.. topic:: References: - +|details-start| +**References** +|details-split| - * `"Online Passive-Aggressive Algorithms" - `_ - K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR 7 (2006) +* `"Online Passive-Aggressive Algorithms" + `_ + K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR 7 (2006) +|details-end| Robustness regression: outliers and modeling errors ===================================================== @@ -1394,7 +1483,7 @@ Note that in general, robust fitting in high-dimensional setting (large in these settings. -.. topic:: **Trade-offs: which estimator?** +.. topic:: Trade-offs: which estimator ? Scikit-learn provides 3 robust regression estimators: :ref:`RANSAC `, @@ -1403,7 +1492,7 @@ in these settings. * :ref:`HuberRegressor ` should be faster than :ref:`RANSAC ` and :ref:`Theil Sen ` - unless the number of samples are very large, i.e ``n_samples`` >> ``n_features``. + unless the number of samples are very large, i.e. ``n_samples`` >> ``n_features``. This is because :ref:`RANSAC ` and :ref:`Theil Sen ` fit on smaller subsets of the data. However, both :ref:`Theil Sen ` and :ref:`RANSAC ` are unlikely to be as robust as @@ -1419,7 +1508,7 @@ in these settings. medium-size outliers in the X direction, but this property will disappear in high-dimensional settings. - When in doubt, use :ref:`RANSAC `. + When in doubt, use :ref:`RANSAC `. .. _ransac_regression: @@ -1445,17 +1534,23 @@ estimated only from the determined inliers. :align: center :scale: 50% -Details of the algorithm -^^^^^^^^^^^^^^^^^^^^^^^^ +.. topic:: Examples + + * :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py` + * :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` + +|details-start| +**Details of the algorithm** +|details-split| Each iteration performs the following steps: 1. Select ``min_samples`` random samples from the original data and check whether the set of data is valid (see ``is_data_valid``). -2. Fit a model to the random subset (``base_estimator.fit``) and check +2. Fit a model to the random subset (``estimator.fit``) and check whether the estimated model is valid (see ``is_model_valid``). 3. Classify all data as inliers or outliers by calculating the residuals - to the estimated model (``base_estimator.predict(X) - y``) - all data + to the estimated model (``estimator.predict(X) - y``) - all data samples with absolute residuals smaller than or equal to the ``residual_threshold`` are considered as inliers. 4. Save fitted model as best model if number of inlier samples is @@ -1473,22 +1568,22 @@ needed for identifying degenerate cases, ``is_data_valid`` should be used as it is called prior to fitting the model and thus leading to better computational performance. +|details-end| -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py` - * :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` +|details-start| +**References** +|details-split| -.. topic:: References: +* https://en.wikipedia.org/wiki/RANSAC +* `"Random Sample Consensus: A Paradigm for Model Fitting with Applications to + Image Analysis and Automated Cartography" + `_ + Martin A. Fischler and Robert C. Bolles - SRI International (1981) +* `"Performance Evaluation of RANSAC Family" + `_ + Sunglok Choi, Taemin Kim and Wonpil Yu - BMVC (2009) - * https://en.wikipedia.org/wiki/RANSAC - * `"Random Sample Consensus: A Paradigm for Model Fitting with Applications to - Image Analysis and Automated Cartography" - `_ - Martin A. Fischler and Robert C. Bolles - SRI International (1981) - * `"Performance Evaluation of RANSAC Family" - `_ - Sunglok Choi, Taemin Kim and Wonpil Yu - BMVC (2009) +|details-end| .. _theil_sen_regression: @@ -1506,12 +1601,10 @@ better than an ordinary least squares in high dimension. * :ref:`sphx_glr_auto_examples_linear_model_plot_theilsen.py` * :ref:`sphx_glr_auto_examples_linear_model_plot_robust_fit.py` -.. topic:: References: - - * https://en.wikipedia.org/wiki/Theil%E2%80%93Sen_estimator -Theoretical considerations -^^^^^^^^^^^^^^^^^^^^^^^^^^ +|details-start| +**Theoretical considerations** +|details-split| :class:`TheilSenRegressor` is comparable to the :ref:`Ordinary Least Squares (OLS) ` in terms of asymptotic efficiency and as an @@ -1543,15 +1636,16 @@ large number of samples and features. Therefore, the magnitude of a subpopulation can be chosen to limit the time and space complexity by considering only a random subset of all possible combinations. -.. topic:: Examples: +.. topic:: References: - * :ref:`sphx_glr_auto_examples_linear_model_plot_theilsen.py` + .. [#f1] Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang: `Theil-Sen Estimators in a Multiple Linear Regression Model. `_ -.. topic:: References: + .. [#f2] T. Kärkkäinen and S. Äyrämö: `On Computation of Spatial Median for Robust Data Mining. `_ - .. [#f1] Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang: `Theil-Sen Estimators in a Multiple Linear Regression Model. `_ + Also see the `Wikipedia page `_ + +|details-end| - .. [#f2] T. Kärkkäinen and S. Äyrämö: `On Computation of Spatial Median for Robust Data Mining. `_ .. _huber_regression: @@ -1570,6 +1664,14 @@ but gives a lesser weight to them. :align: center :scale: 50% +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py` + +|details-start| +**Mathematical details** +|details-split| + The loss function that :class:`HuberRegressor` minimizes is given by .. math:: @@ -1581,14 +1683,20 @@ where .. math:: H_{\epsilon}(z) = \begin{cases} - z^2, & \text {if } |z| < \epsilon, \\ - 2\epsilon|z| - \epsilon^2, & \text{otherwise} + z^2, & \text {if } |z| < \epsilon, \\ + 2\epsilon|z| - \epsilon^2, & \text{otherwise} \end{cases} -It is advised to set the parameter ``epsilon`` to 1.35 to achieve 95% statistical efficiency. +It is advised to set the parameter ``epsilon`` to 1.35 to achieve 95% +statistical efficiency. + +.. topic:: References: + + * Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale + estimates, pg 172 + +|details-end| -Notes ------ The :class:`HuberRegressor` differs from using :class:`SGDRegressor` with loss set to `huber` in the following ways. @@ -1601,14 +1709,6 @@ in the following ways. samples while :class:`SGDRegressor` needs a number of passes on the training data to produce the same robustness. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py` - -.. topic:: References: - - * Peter J. Huber, Elvezio M. Ronchetti: Robust Statistics, Concomitant scale estimates, pg 172 - Note that this estimator is different from the R implementation of Robust Regression (https://stats.oarc.ucla.edu/r/dae/robust-regression/) because the R implementation does a weighted least squares implementation with weights given to each sample on the basis of how much the residual is @@ -1623,6 +1723,37 @@ Quantile regression estimates the median or other quantiles of :math:`y` conditional on :math:`X`, while ordinary least squares (OLS) estimates the conditional mean. +Quantile regression may be useful if one is interested in predicting an +interval instead of point prediction. Sometimes, prediction intervals are +calculated based on the assumption that prediction error is distributed +normally with zero mean and constant variance. Quantile regression provides +sensible prediction intervals even for errors with non-constant (but +predictable) variance or non-normal distribution. + +.. figure:: /auto_examples/linear_model/images/sphx_glr_plot_quantile_regression_002.png + :target: ../auto_examples/linear_model/plot_quantile_regression.html + :align: center + :scale: 50% + +Based on minimizing the pinball loss, conditional quantiles can also be +estimated by models other than linear models. For example, +:class:`~sklearn.ensemble.GradientBoostingRegressor` can predict conditional +quantiles if its parameter ``loss`` is set to ``"quantile"`` and parameter +``alpha`` is set to the quantile that should be predicted. See the example in +:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`. + +Most implementations of quantile regression are based on linear programming +problem. The current implementation is based on +:func:`scipy.optimize.linprog`. + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py` + +|details-start| +**Mathematical details** +|details-split| + As a linear model, the :class:`QuantileRegressor` gives linear predictions :math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, :math:`q \in (0, 1)`. The weights or coefficients :math:`w` are then found by the following @@ -1650,45 +1781,24 @@ As the pinball loss is only linear in the residuals, quantile regression is much more robust to outliers than squared error based estimation of the mean. Somewhat in between is the :class:`HuberRegressor`. -Quantile regression may be useful if one is interested in predicting an -interval instead of point prediction. Sometimes, prediction intervals are -calculated based on the assumption that prediction error is distributed -normally with zero mean and constant variance. Quantile regression provides -sensible prediction intervals even for errors with non-constant (but -predictable) variance or non-normal distribution. - -.. figure:: /auto_examples/linear_model/images/sphx_glr_plot_quantile_regression_002.png - :target: ../auto_examples/linear_model/plot_quantile_regression.html - :align: center - :scale: 50% - -Based on minimizing the pinball loss, conditional quantiles can also be -estimated by models other than linear models. For example, -:class:`~sklearn.ensemble.GradientBoostingRegressor` can predict conditional -quantiles if its parameter ``loss`` is set to ``"quantile"`` and parameter -``alpha`` is set to the quantile that should be predicted. See the example in -:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_quantile.py`. +|details-end| -Most implementations of quantile regression are based on linear programming -problem. The current implementation is based on -:func:`scipy.optimize.linprog`. +|details-start| +**References** +|details-split| -.. topic:: Examples: +* Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles. + `_ + Econometrica: journal of the Econometric Society, 33-50. - * :ref:`sphx_glr_auto_examples_linear_model_plot_quantile_regression.py` +* Portnoy, S., & Koenker, R. (1997). :doi:`The Gaussian hare and the Laplacian + tortoise: computability of squared-error versus absolute-error estimators. + Statistical Science, 12, 279-300 <10.1214/ss/1030037960>`. -.. topic:: References: +* Koenker, R. (2005). :doi:`Quantile Regression <10.1017/CBO9780511754098>`. + Cambridge University Press. - * Koenker, R., & Bassett Jr, G. (1978). `Regression quantiles. - `_ - Econometrica: journal of the Econometric Society, 33-50. - - * Portnoy, S., & Koenker, R. (1997). :doi:`The Gaussian hare and the Laplacian - tortoise: computability of squared-error versus absolute-error estimators. - Statistical Science, 12, 279-300 <10.1214/ss/1030037960>`. - - * Koenker, R. (2005). :doi:`Quantile Regression <10.1017/CBO9780511754098>`. - Cambridge University Press. +|details-end| .. _polynomial_regression: @@ -1703,6 +1813,10 @@ on nonlinear functions of the data. This approach maintains the generally fast performance of linear methods, while allowing them to fit a much wider range of data. +|details-start| +**Mathematical details** +|details-split| + For example, a simple linear regression can be extended by constructing **polynomial features** from the coefficients. In the standard linear regression case, you might have a model that looks like this for @@ -1730,6 +1844,8 @@ and can be solved by the same techniques. By considering linear fits within a higher-dimensional space built with these basis functions, the model has the flexibility to fit a much broader range of data. +|details-end| + Here is an example of applying this idea to one-dimensional data, using polynomial features of varying degrees: diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst index a92545a01945e..7cc6776e37daa 100644 --- a/doc/modules/manifold.rst +++ b/doc/modules/manifold.rst @@ -130,8 +130,10 @@ distances between all points. Isomap can be performed with the object :align: center :scale: 50 -Complexity ----------- +|details-start| +**Complexity** +|details-split| + The Isomap algorithm comprises three stages: 1. **Nearest neighbor search.** Isomap uses @@ -162,6 +164,8 @@ The overall complexity of Isomap is * :math:`k` : number of nearest neighbors * :math:`d` : output dimension +|details-end| + .. topic:: References: * `"A global geometric framework for nonlinear dimensionality reduction" @@ -187,8 +191,9 @@ Locally linear embedding can be performed with function :align: center :scale: 50 -Complexity ----------- +|details-start| +**Complexity** +|details-split| The standard LLE algorithm comprises three stages: @@ -209,6 +214,8 @@ The overall complexity of standard LLE is * :math:`k` : number of nearest neighbors * :math:`d` : output dimension +|details-end| + .. topic:: References: * `"Nonlinear dimensionality reduction by locally linear embedding" @@ -241,8 +248,9 @@ It requires ``n_neighbors > n_components``. :align: center :scale: 50 -Complexity ----------- +|details-start| +**Complexity** +|details-split| The MLLE algorithm comprises three stages: @@ -265,6 +273,8 @@ The overall complexity of MLLE is * :math:`k` : number of nearest neighbors * :math:`d` : output dimension +|details-end| + .. topic:: References: * `"MLLE: Modified Locally Linear Embedding Using Multiple Weights" @@ -291,8 +301,9 @@ It requires ``n_neighbors > n_components * (n_components + 3) / 2``. :align: center :scale: 50 -Complexity ----------- +|details-start| +**Complexity** +|details-split| The HLLE algorithm comprises three stages: @@ -313,6 +324,8 @@ The overall complexity of standard HLLE is * :math:`k` : number of nearest neighbors * :math:`d` : output dimension +|details-end| + .. topic:: References: * `"Hessian Eigenmaps: Locally linear embedding techniques for @@ -335,8 +348,9 @@ preserving local distances. Spectral embedding can be performed with the function :func:`spectral_embedding` or its object-oriented counterpart :class:`SpectralEmbedding`. -Complexity ----------- +|details-start| +**Complexity** +|details-split| The Spectral Embedding (Laplacian Eigenmaps) algorithm comprises three stages: @@ -358,6 +372,8 @@ The overall complexity of spectral embedding is * :math:`k` : number of nearest neighbors * :math:`d` : output dimension +|details-end| + .. topic:: References: * `"Laplacian Eigenmaps for Dimensionality Reduction @@ -383,8 +399,9 @@ tangent spaces to learn the embedding. LTSA can be performed with function :align: center :scale: 50 -Complexity ----------- +|details-start| +**Complexity** +|details-split| The LTSA algorithm comprises three stages: @@ -404,6 +421,8 @@ The overall complexity of standard LTSA is * :math:`k` : number of nearest neighbors * :math:`d` : output dimension +|details-end| + .. topic:: References: * :arxiv:`"Principal manifolds and nonlinear dimensionality reduction via @@ -448,8 +467,9 @@ the similarities chosen in some optimal ways. The objective, called the stress, is then defined by :math:`\sum_{i < j} d_{ij}(X) - \hat{d}_{ij}(X)` -Metric MDS ----------- +|details-start| +**Metric MDS** +|details-split| The simplest metric :class:`MDS` model, called *absolute MDS*, disparities are defined by :math:`\hat{d}_{ij} = S_{ij}`. With absolute MDS, the value :math:`S_{ij}` @@ -458,8 +478,11 @@ should then correspond exactly to the distance between point :math:`i` and Most commonly, disparities are set to :math:`\hat{d}_{ij} = b S_{ij}`. -Nonmetric MDS -------------- +|details-end| + +|details-start| +**Nonmetric MDS** +|details-split| Non metric :class:`MDS` focuses on the ordination of the data. If :math:`S_{ij} > S_{jk}`, then the embedding should enforce :math:`d_{ij} < @@ -490,6 +513,7 @@ in the metric case. :align: center :scale: 60 +|details-end| .. topic:: References: @@ -551,8 +575,10 @@ The disadvantages to using t-SNE are roughly: :align: center :scale: 50 -Optimizing t-SNE ----------------- +|details-start| +**Optimizing t-SNE** +|details-split| + The main purpose of t-SNE is visualization of high-dimensional data. Hence, it works best when the data will be embedded on two or three dimensions. @@ -601,8 +627,11 @@ but less accurate results. provides a good discussion of the effects of the various parameters, as well as interactive plots to explore the effects of different parameters. -Barnes-Hut t-SNE ----------------- +|details-end| + +|details-start| +**Barnes-Hut t-SNE** +|details-split| The Barnes-Hut t-SNE that has been implemented here is usually much slower than other manifold learning algorithms. The optimization is quite difficult @@ -615,7 +644,7 @@ Barnes-Hut method improves on the exact method where t-SNE complexity is or less. The 2D case is typical when building visualizations. * Barnes-Hut only works with dense input data. Sparse data matrices can only be embedded with the exact method or can be approximated by a dense low rank - projection for instance using :class:`~sklearn.decomposition.TruncatedSVD` + projection for instance using :class:`~sklearn.decomposition.PCA` * Barnes-Hut is an approximation of the exact method. The approximation is parameterized with the angle parameter, therefore the angle parameter is unused when method="exact" @@ -638,11 +667,12 @@ imply that the data cannot be correctly classified by a supervised model. It might be the case that 2 dimensions are not high enough to accurately represent the internal structure of the data. +|details-end| .. topic:: References: * `"Visualizing High-Dimensional Data Using t-SNE" - `_ + `_ van der Maaten, L.J.P.; Hinton, G. Journal of Machine Learning Research (2008) diff --git a/doc/modules/metrics.rst b/doc/modules/metrics.rst index 71e914afad192..caea39319e869 100644 --- a/doc/modules/metrics.rst +++ b/doc/modules/metrics.rst @@ -28,9 +28,9 @@ There are a number of ways to convert between a distance metric and a similarity measure, such as a kernel. Let ``D`` be the distance, and ``S`` be the kernel: - 1. ``S = np.exp(-D * gamma)``, where one heuristic for choosing - ``gamma`` is ``1 / num_features`` - 2. ``S = 1. / (D / np.max(D))`` +1. ``S = np.exp(-D * gamma)``, where one heuristic for choosing + ``gamma`` is ``1 / num_features`` +2. ``S = 1. / (D / np.max(D))`` .. currentmodule:: sklearn.metrics @@ -123,8 +123,8 @@ The polynomial kernel is defined as: where: - * ``x``, ``y`` are the input vectors - * ``d`` is the kernel degree +* ``x``, ``y`` are the input vectors +* ``d`` is the kernel degree If :math:`c_0 = 0` the kernel is said to be homogeneous. @@ -143,9 +143,9 @@ activation function). It is defined as: where: - * ``x``, ``y`` are the input vectors - * :math:`\gamma` is known as slope - * :math:`c_0` is known as intercept +* ``x``, ``y`` are the input vectors +* :math:`\gamma` is known as slope +* :math:`c_0` is known as intercept .. _rbf_kernel: @@ -165,14 +165,14 @@ the kernel is known as the Gaussian kernel of variance :math:`\sigma^2`. Laplacian kernel ---------------- -The function :func:`laplacian_kernel` is a variant on the radial basis +The function :func:`laplacian_kernel` is a variant on the radial basis function kernel defined as: .. math:: k(x, y) = \exp( -\gamma \| x-y \|_1) -where ``x`` and ``y`` are the input vectors and :math:`\|x-y\|_1` is the +where ``x`` and ``y`` are the input vectors and :math:`\|x-y\|_1` is the Manhattan distance between the input vectors. It has proven useful in ML applied to noiseless data. @@ -229,4 +229,3 @@ The chi squared kernel is most commonly used on histograms (bags) of visual word categories: A comprehensive study International Journal of Computer Vision 2007 https://hal.archives-ouvertes.fr/hal-00171412/document - diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst index 693a2c7793823..df5d8020a1369 100644 --- a/doc/modules/mixture.rst +++ b/doc/modules/mixture.rst @@ -14,13 +14,13 @@ matrices supported), sample them, and estimate them from data. Facilities to help determine the appropriate number of components are also provided. - .. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_pdf_001.png - :target: ../auto_examples/mixture/plot_gmm_pdf.html - :align: center - :scale: 50% +.. figure:: ../auto_examples/mixture/images/sphx_glr_plot_gmm_pdf_001.png + :target: ../auto_examples/mixture/plot_gmm_pdf.html + :align: center + :scale: 50% - **Two-component Gaussian mixture model:** *data points, and equi-probability - surfaces of the model.* + **Two-component Gaussian mixture model:** *data points, and equi-probability + surfaces of the model.* A Gaussian mixture model is a probabilistic model that assumes all the data points are generated from a mixture of a finite number of @@ -43,7 +43,7 @@ confidence ellipsoids for multivariate models, and compute the Bayesian Information Criterion to assess the number of clusters in the data. A :meth:`GaussianMixture.fit` method is provided that learns a Gaussian Mixture Model from train data. Given test data, it can assign to each -sample the Gaussian it mostly probably belongs to using +sample the Gaussian it most probably belongs to using the :meth:`GaussianMixture.predict` method. .. @@ -68,33 +68,36 @@ full covariance. * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_pdf.py` for an example on plotting the density estimation. -Pros and cons of class :class:`GaussianMixture` ------------------------------------------------ +|details-start| +**Pros and cons of class GaussianMixture** +|details-split| + +.. topic:: Pros: + + :Speed: It is the fastest algorithm for learning mixture models -Pros -.... + :Agnostic: As this algorithm maximizes only the likelihood, it + will not bias the means towards zero, or bias the cluster sizes to + have specific structures that might or might not apply. -:Speed: It is the fastest algorithm for learning mixture models +.. topic:: Cons: -:Agnostic: As this algorithm maximizes only the likelihood, it - will not bias the means towards zero, or bias the cluster sizes to - have specific structures that might or might not apply. + :Singularities: When one has insufficiently many points per + mixture, estimating the covariance matrices becomes difficult, + and the algorithm is known to diverge and find solutions with + infinite likelihood unless one regularizes the covariances artificially. -Cons -.... + :Number of components: This algorithm will always use all the + components it has access to, needing held-out data + or information theoretical criteria to decide how many components to use + in the absence of external cues. -:Singularities: When one has insufficiently many points per - mixture, estimating the covariance matrices becomes difficult, - and the algorithm is known to diverge and find solutions with - infinite likelihood unless one regularizes the covariances artificially. +|details-end| -:Number of components: This algorithm will always use all the - components it has access to, needing held-out data - or information theoretical criteria to decide how many components to use - in the absence of external cues. -Selecting the number of components in a classical Gaussian Mixture Model ------------------------------------------------------------------------- +|details-start| +**Selecting the number of components in a classical Gaussian Mixture model** +|details-split| The BIC criterion can be used to select the number of components in a Gaussian Mixture in an efficient way. In theory, it recovers the true number of @@ -114,10 +117,13 @@ model. * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py` for an example of model selection performed with classical Gaussian mixture. +|details-end| + .. _expectation_maximization: -Estimation algorithm Expectation-maximization ------------------------------------------------ +|details-start| +**Estimation algorithm expectation-maximization** +|details-split| The main difficulty in learning Gaussian mixture models from unlabeled data is that one usually doesn't know which points came from @@ -135,8 +141,11 @@ parameters to maximize the likelihood of the data given those assignments. Repeating this process is guaranteed to always converge to a local optimum. -Choice of the Initialization Method ------------------------------------ +|details-end| + +|details-start| +**Choice of the Initialization method** +|details-split| There is a choice of four initialization methods (as well as inputting user defined initial means) to generate the initial centers for the model components: @@ -172,6 +181,8 @@ random * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of using different initializations in Gaussian Mixture. +|details-end| + .. _bgmm: Variational Bayesian Gaussian Mixture @@ -183,8 +194,7 @@ similar to the one defined by :class:`GaussianMixture`. .. _variational_inference: -Estimation algorithm: variational inference ---------------------------------------------- +**Estimation algorithm: variational inference** Variational inference is an extension of expectation-maximization that maximizes a lower bound on model evidence (including @@ -282,48 +292,47 @@ from the two resulting mixtures. ``weight_concentration_prior_type`` for different values of the parameter ``weight_concentration_prior``. +|details-start| +**Pros and cons of variational inference with BayesianGaussianMixture** +|details-split| -Pros and cons of variational inference with :class:`BayesianGaussianMixture` ----------------------------------------------------------------------------- - -Pros -..... +.. topic:: Pros: -:Automatic selection: when ``weight_concentration_prior`` is small enough and - ``n_components`` is larger than what is found necessary by the model, the - Variational Bayesian mixture model has a natural tendency to set some mixture - weights values close to zero. This makes it possible to let the model choose - a suitable number of effective components automatically. Only an upper bound - of this number needs to be provided. Note however that the "ideal" number of - active components is very application specific and is typically ill-defined - in a data exploration setting. + :Automatic selection: when ``weight_concentration_prior`` is small enough and + ``n_components`` is larger than what is found necessary by the model, the + Variational Bayesian mixture model has a natural tendency to set some mixture + weights values close to zero. This makes it possible to let the model choose + a suitable number of effective components automatically. Only an upper bound + of this number needs to be provided. Note however that the "ideal" number of + active components is very application specific and is typically ill-defined + in a data exploration setting. -:Less sensitivity to the number of parameters: unlike finite models, which will - almost always use all components as much as they can, and hence will produce - wildly different solutions for different numbers of components, the - variational inference with a Dirichlet process prior - (``weight_concentration_prior_type='dirichlet_process'``) won't change much - with changes to the parameters, leading to more stability and less tuning. + :Less sensitivity to the number of parameters: unlike finite models, which will + almost always use all components as much as they can, and hence will produce + wildly different solutions for different numbers of components, the + variational inference with a Dirichlet process prior + (``weight_concentration_prior_type='dirichlet_process'``) won't change much + with changes to the parameters, leading to more stability and less tuning. -:Regularization: due to the incorporation of prior information, - variational solutions have less pathological special cases than - expectation-maximization solutions. + :Regularization: due to the incorporation of prior information, + variational solutions have less pathological special cases than + expectation-maximization solutions. -Cons -..... +.. topic:: Cons: -:Speed: the extra parametrization necessary for variational inference makes - inference slower, although not by much. + :Speed: the extra parametrization necessary for variational inference makes + inference slower, although not by much. -:Hyperparameters: this algorithm needs an extra hyperparameter - that might need experimental tuning via cross-validation. + :Hyperparameters: this algorithm needs an extra hyperparameter + that might need experimental tuning via cross-validation. -:Bias: there are many implicit biases in the inference algorithms (and also in - the Dirichlet process if used), and whenever there is a mismatch between - these biases and the data it might be possible to fit better models using a - finite mixture. + :Bias: there are many implicit biases in the inference algorithms (and also in + the Dirichlet process if used), and whenever there is a mismatch between + these biases and the data it might be possible to fit better models using a + finite mixture. +|details-end| .. _dirichlet_process: diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 1788fc806ab53..056bf9a56d42c 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -77,6 +77,7 @@ Scoring Function 'roc_auc_ovo' :func:`metrics.roc_auc_score` 'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score` 'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score` +'d2_log_loss_score' :func:`metrics.d2_log_loss_score` **Clustering** 'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score` @@ -94,19 +95,17 @@ Scoring Function 'max_error' :func:`metrics.max_error` 'neg_mean_absolute_error' :func:`metrics.mean_absolute_error` 'neg_mean_squared_error' :func:`metrics.mean_squared_error` -'neg_root_mean_squared_error' :func:`metrics.mean_squared_error` +'neg_root_mean_squared_error' :func:`metrics.root_mean_squared_error` 'neg_mean_squared_log_error' :func:`metrics.mean_squared_log_error` +'neg_root_mean_squared_log_error' :func:`metrics.root_mean_squared_log_error` 'neg_median_absolute_error' :func:`metrics.median_absolute_error` 'r2' :func:`metrics.r2_score` 'neg_mean_poisson_deviance' :func:`metrics.mean_poisson_deviance` 'neg_mean_gamma_deviance' :func:`metrics.mean_gamma_deviance` 'neg_mean_absolute_percentage_error' :func:`metrics.mean_absolute_percentage_error` -'d2_absolute_error_score' :func:`metrics.d2_absolute_error_score` -'d2_pinball_score' :func:`metrics.d2_pinball_score` -'d2_tweedie_score' :func:`metrics.d2_tweedie_score` +'d2_absolute_error_score' :func:`metrics.d2_absolute_error_score` ==================================== ============================================== ================================== - Usage examples: >>> from sklearn import svm, datasets @@ -115,17 +114,11 @@ Usage examples: >>> clf = svm.SVC(random_state=0) >>> cross_val_score(clf, X, y, cv=5, scoring='recall_macro') array([0.96..., 0.96..., 0.96..., 0.93..., 1. ]) - >>> model = svm.SVC() - >>> cross_val_score(model, X, y, cv=5, scoring='wrong_choice') - Traceback (most recent call last): - ValueError: 'wrong_choice' is not a valid scoring value. Use - sklearn.metrics.get_scorer_names() to get valid options. .. note:: - The values listed by the ``ValueError`` exception correspond to the - functions measuring prediction accuracy described in the following - sections. You can retrieve the names of all available scorers by calling + If a wrong scoring name is passed, an ``InvalidParameterError`` is raised. + You can retrieve the names of all available scorers by calling :func:`~sklearn.metrics.get_scorer_names`. .. currentmodule:: sklearn.metrics @@ -135,38 +128,54 @@ Usage examples: Defining your scoring strategy from metric functions ----------------------------------------------------- +The following metrics functions are not implemented as named scorers, +sometimes because they require additional parameters, such as +:func:`fbeta_score`. They cannot be passed to the ``scoring`` +parameters; instead their callable needs to be passed to +:func:`make_scorer` together with the value of the user-settable +parameters. + +===================================== ========= ============================================== +Function Parameter Example usage +===================================== ========= ============================================== +**Classification** +:func:`metrics.fbeta_score` ``beta`` ``make_scorer(fbeta_score, beta=2)`` + +**Regression** +:func:`metrics.mean_tweedie_deviance` ``power`` ``make_scorer(mean_tweedie_deviance, power=1.5)`` +:func:`metrics.mean_pinball_loss` ``alpha`` ``make_scorer(mean_pinball_loss, alpha=0.95)`` +:func:`metrics.d2_tweedie_score` ``power`` ``make_scorer(d2_tweedie_score, power=1.5)`` +:func:`metrics.d2_pinball_score` ``alpha`` ``make_scorer(d2_pinball_score, alpha=0.95)`` +===================================== ========= ============================================== + +One typical use case is to wrap an existing metric function from the library +with non-default values for its parameters, such as the ``beta`` parameter for +the :func:`fbeta_score` function:: + + >>> from sklearn.metrics import fbeta_score, make_scorer + >>> ftwo_scorer = make_scorer(fbeta_score, beta=2) + >>> from sklearn.model_selection import GridSearchCV + >>> from sklearn.svm import LinearSVC + >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, + ... scoring=ftwo_scorer, cv=5) + The module :mod:`sklearn.metrics` also exposes a set of simple functions measuring a prediction error given ground truth and prediction: - functions ending with ``_score`` return a value to maximize, the higher the better. -- functions ending with ``_error`` or ``_loss`` return a +- functions ending with ``_error``, ``_loss``, or ``_deviance`` return a value to minimize, the lower the better. When converting into a scorer object using :func:`make_scorer`, set the ``greater_is_better`` parameter to ``False`` (``True`` by default; see the parameter description below). -Metrics available for various machine learning tasks are detailed in sections -below. - -Many metrics are not given names to be used as ``scoring`` values, -sometimes because they require additional parameters, such as -:func:`fbeta_score`. In such cases, you need to generate an appropriate -scoring object. The simplest way to generate a callable object for scoring -is by using :func:`make_scorer`. That function converts metrics -into callables that can be used for model evaluation. -One typical use case is to wrap an existing metric function from the library -with non-default values for its parameters, such as the ``beta`` parameter for -the :func:`fbeta_score` function:: +|details-start| +**Custom scorer objects** +|details-split| - >>> from sklearn.metrics import fbeta_score, make_scorer - >>> ftwo_scorer = make_scorer(fbeta_score, beta=2) - >>> from sklearn.model_selection import GridSearchCV - >>> from sklearn.svm import LinearSVC - >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, - ... scoring=ftwo_scorer, cv=5) The second use case is to build a completely custom scorer object from a simple python function using :func:`make_scorer`, which can @@ -180,9 +189,15 @@ take several parameters: of the python function is negated by the scorer object, conforming to the cross validation convention that scorers return higher values for better models. -* for classification metrics only: whether the python function you provided requires continuous decision - certainties (``needs_threshold=True``). The default value is - False. +* for classification metrics only: whether the python function you provided requires + continuous decision certainties. If the scoring function only accepts probability + estimates (e.g. :func:`metrics.log_loss`) then one needs to set the parameter + `response_method`, thus in this case `response_method="predict_proba"`. Some scoring + function do not necessarily require probability estimates but rather non-thresholded + decision values (e.g. :func:`metrics.roc_auc_score`). In this case, one provides a + list such as `response_method=["decision_function", "predict_proba"]`. In this case, + the scorer will use the first available method, in the order given in the list, + to compute the scores. * any additional parameters, such as ``beta`` or ``labels`` in :func:`f1_score`. @@ -208,13 +223,21 @@ Here is an example of building custom scorers, and of using the >>> score(clf, X, y) -0.69... +|details-end| .. _diy_scoring: Implementing your own scoring object ------------------------------------ + You can generate even more flexible model scorers by constructing your own scoring object from scratch, without using the :func:`make_scorer` factory. + + +|details-start| +**How to build a scorer from scratch** +|details-split| + For a callable to be a scorer, it needs to meet the protocol specified by the following two rules: @@ -228,6 +251,14 @@ the following two rules: Again, by convention higher numbers are better, so if your scorer returns loss, that value should be negated. +- Advanced: If it requires extra metadata to be passed to it, it should expose + a ``get_metadata_routing`` method returning the requested metadata. The user + should be able to set the requested metadata via a ``set_score_request`` + method. Please see :ref:`User Guide ` and :ref:`Developer + Guide ` for + more details. + + .. note:: **Using custom scorers in functions where n_jobs > 1** While defining the custom scoring function alongside the calling function @@ -247,6 +278,8 @@ the following two rules: ... cv=5, ... n_jobs=-1) # doctest: +SKIP +|details-end| + .. _multimetric_scoring: Using multiple metric evaluation @@ -345,6 +378,7 @@ Some also work in the multilabel case: recall_score roc_auc_score zero_one_loss + d2_log_loss_score And some work with binary and multilabel (but not multiclass) problems: @@ -433,7 +467,7 @@ where :math:`1(x)` is the `indicator function >>> accuracy_score(y_true, y_pred) 0.5 >>> accuracy_score(y_true, y_pred, normalize=False) - 2 + 2.0 In the multilabel case with binary label indicators:: @@ -801,10 +835,12 @@ score: recall_score Note that the :func:`precision_recall_curve` function is restricted to the -binary case. The :func:`average_precision_score` function works only in -binary classification and multilabel indicator format. -The :func:`PredictionRecallDisplay.from_estimator` and -:func:`PredictionRecallDisplay.from_predictions` functions will plot the +binary case. The :func:`average_precision_score` function supports multiclass +and multilabel formats by computing each class score in a One-vs-the-rest (OvR) +fashion and averaging them or not depending of its ``average`` argument value. + +The :func:`PrecisionRecallDisplay.from_estimator` and +:func:`PrecisionRecallDisplay.from_predictions` functions will plot the precision-recall curve as follows. .. image:: ../auto_examples/model_selection/images/sphx_glr_plot_precision_recall_001.png @@ -822,7 +858,6 @@ precision-recall curve as follows. for an example of :func:`precision_recall_curve` usage to evaluate classifier output quality. - .. topic:: References: .. [Manning2008] C.D. Manning, P. Raghavan, H. Schütze, `Introduction to Information Retrieval @@ -839,7 +874,6 @@ precision-recall curve as follows. `_, NIPS 2015. - Binary classification ^^^^^^^^^^^^^^^^^^^^^ @@ -859,22 +893,36 @@ following table: | | Missing result | Correct absence of result| +-------------------+---------------------+--------------------------+ -In this context, we can define the notions of precision, recall and F-measure: +In this context, we can define the notions of precision and recall: .. math:: - \text{precision} = \frac{tp}{tp + fp}, + \text{precision} = \frac{\text{tp}}{\text{tp} + \text{fp}}, .. math:: - \text{recall} = \frac{tp}{tp + fn}, + \text{recall} = \frac{\text{tp}}{\text{tp} + \text{fn}}, + +(Sometimes recall is also called ''sensitivity'') + +F-measure is the weighted harmonic mean of precision and recall, with precision's +contribution to the mean weighted by some parameter :math:`\beta`: .. math:: - F_\beta = (1 + \beta^2) \frac{\text{precision} \times \text{recall}}{\beta^2 \text{precision} + \text{recall}}. + F_\beta = (1 + \beta^2) \frac{\text{precision} \times \text{recall}}{\beta^2 \text{precision} + \text{recall}} + +To avoid division by zero when precision and recall are zero, Scikit-Learn calculates F-measure with this +otherwise-equivalent formula: + +.. math:: -Sometimes recall is also called ''sensitivity''. + F_\beta = \frac{(1 + \beta^2) \text{tp}}{(1 + \beta^2) \text{tp} + \text{fp} + \beta^2 \text{fn}} +Note that this formula is still undefined when there are no true positives, false +positives, or false negatives. By default, F-1 for a set of exclusively true negatives +is calculated as 0, however this behavior can be changed using the `zero_division` +parameter. Here are some small examples in binary classification:: >>> from sklearn import metrics @@ -919,13 +967,20 @@ In a multiclass and multilabel classification task, the notions of precision, recall, and F-measures can be applied to each label independently. There are a few ways to combine results across labels, specified by the ``average`` argument to the -:func:`average_precision_score` (multilabel only), :func:`f1_score`, +:func:`average_precision_score`, :func:`f1_score`, :func:`fbeta_score`, :func:`precision_recall_fscore_support`, :func:`precision_score` and :func:`recall_score` functions, as described -:ref:`above `. Note that if all labels are included, "micro"-averaging -in a multiclass setting will produce precision, recall and :math:`F` -that are all identical to accuracy. Also note that "weighted" averaging may -produce an F-score that is not between precision and recall. +:ref:`above `. + +Note the following behaviors when averaging: + +* If all labels are included, "micro"-averaging in a multiclass setting will produce + precision, recall and :math:`F` that are all identical to accuracy. +* "weighted" averaging may produce a F-score that is not between precision and recall. +* "macro" averaging for F-measures is calculated as the arithmetic mean over + per-label/class F-measures, not the harmonic mean over the arithmetic precision and + recall means. Both calculations can be seen in the literature but are not equivalent, + see [OB2019]_ for details. To make this more explicit, consider the following notation: @@ -986,6 +1041,11 @@ Similarly, labels not present in the data sample may be accounted for in macro-a >>> metrics.precision_score(y_true, y_pred, labels=[0, 1, 2, 3], average='macro') 0.166... +.. topic:: References: + + .. [OB2019] :arxiv:`Opitz, J., & Burst, S. (2019). "Macro f1 and macro f1." + <1911.03347>` + .. _jaccard_similarity_score: Jaccard similarity coefficient score @@ -1066,7 +1126,7 @@ output by the `decision_function` method), then the hinge loss is defined as: If there are more than two labels, :func:`hinge_loss` uses a multiclass variant due to Crammer & Singer. -`Here `_ is +`Here `_ is the paper describing it. In this case the predicted decision is an array of shape (`n_samples`, @@ -1366,7 +1426,7 @@ function:: >>> tpr array([0. , 0.5, 0.5, 1. , 1. ]) >>> thresholds - array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ]) + array([ inf, 0.8 , 0.4 , 0.35, 0.1 ]) Compared to metrics such as the subset accuracy, the Hamming loss, or the F1 score, ROC doesn't require optimizing a threshold for each label. @@ -1436,7 +1496,11 @@ correspond to the probability estimates that a sample belongs to a particular class. The OvO and OvR algorithms support weighting uniformly (``average='macro'``) and by prevalence (``average='weighted'``). -**One-vs-one Algorithm**: Computes the average AUC of all possible pairwise +|details-start| +**One-vs-one Algorithm** +|details-split| + +Computes the average AUC of all possible pairwise combinations of classes. [HT2001]_ defines a multiclass AUC metric weighted uniformly: @@ -1465,7 +1529,13 @@ the keyword argument ``multiclass`` to ``'ovo'`` and ``average`` to ``'weighted'``. The ``'weighted'`` option returns a prevalence-weighted average as described in [FC2009]_. -**One-vs-rest Algorithm**: Computes the AUC of each class against the rest +|details-end| + +|details-start| +**One-vs-rest Algorithm** +|details-split| + +Computes the AUC of each class against the rest [PD2000]_. The algorithm is functionally the same as the multilabel case. To enable this algorithm set the keyword argument ``multiclass`` to ``'ovr'``. Additionally to ``'macro'`` [F2006]_ and ``'weighted'`` [F2001]_ averaging, OvR @@ -1476,7 +1546,7 @@ In applications where a high false positive rate is not tolerable the parameter to the given limit. The following figure shows the micro-averaged ROC curve and its corresponding -ROC-AUC score for a classifier aimed to distinguish the the different species in +ROC-AUC score for a classifier aimed to distinguish the different species in the :ref:`iris_dataset`: .. image:: ../auto_examples/model_selection/images/sphx_glr_plot_roc_002.png @@ -1484,6 +1554,8 @@ the :ref:`iris_dataset`: :scale: 75 :align: center +|details-end| + .. _roc_auc_multilabel: Multi-label case @@ -1531,23 +1603,25 @@ And the decision values do not require such processing. .. [HT2001] Hand, D.J. and Till, R.J., (2001). `A simple generalisation of the area under the ROC curve for multiple class classification problems. `_ - Machine learning, 45(2), pp.171-186. + Machine learning, 45(2), pp. 171-186. .. [FC2009] Ferri, Cèsar & Hernandez-Orallo, Jose & Modroiu, R. (2009). `An Experimental Comparison of Performance Measures for Classification. `_ Pattern Recognition Letters. 30. 27-38. - .. [PD2000] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving - probability estimation trees (Section 6.2), CeDER Working Paper #IS-00-04, - Stern School of Business, New York University. + .. [PD2000] Provost, F., Domingos, P. (2000). `Well-trained PETs: Improving + probability estimation trees + `_ + (Section 6.2), CeDER Working Paper #IS-00-04, Stern School of Business, + New York University. .. [F2006] Fawcett, T., 2006. `An introduction to ROC analysis. `_ Pattern Recognition Letters, 27(8), pp. 861-874. .. [F2001] Fawcett, T., 2001. `Using rule sets to maximize - ROC performance `_ + ROC performance `_ In Data Mining, 2001. Proceedings IEEE International Conference, pp. 131-138. @@ -1585,7 +1659,15 @@ same classification task: :scale: 75 :align: center -**Properties:** +.. topic:: Examples: + + * See :ref:`sphx_glr_auto_examples_model_selection_plot_det.py` + for an example comparison between receiver operating characteristic (ROC) + curves and Detection error tradeoff (DET) curves. + +|details-start| +**Properties** +|details-split| * DET curves form a linear curve in normal deviate scale if the detection scores are normally (or close-to normally) distributed. @@ -1601,7 +1683,11 @@ same classification task: of perfection for DET curves is the origin (in contrast to the top left corner for ROC curves). -**Applications and limitations:** +|details-end| + +|details-start| +**Applications and limitations** +|details-split| DET curves are intuitive to read and hence allow quick visual assessment of a classifier's performance. @@ -1614,11 +1700,7 @@ Therefore for either automated evaluation or comparison to other classification tasks metrics like the derived area under ROC curve might be better suited. -.. topic:: Examples: - - * See :ref:`sphx_glr_auto_examples_model_selection_plot_det.py` - for an example comparison between receiver operating characteristic (ROC) - curves and Detection error tradeoff (DET) curves. +|details-end| .. topic:: References: @@ -1674,7 +1756,7 @@ loss can also be computed as :math:`zero-one loss = 1 - accuracy`. >>> zero_one_loss(y_true, y_pred) 0.25 >>> zero_one_loss(y_true, y_pred, normalize=False) - 1 + 1.0 In the multilabel case with binary label indicators, where the first label set [0,1] has an error:: @@ -1683,7 +1765,7 @@ set [0,1] has an error:: 0.5 >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)), normalize=False) - 1 + 1.0 .. topic:: Example: @@ -1819,7 +1901,13 @@ counts ``tp`` (see `the wikipedia page `_ for the actual formulas). -**Interpretation across varying prevalence:** +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_model_selection_plot_likelihood_ratios.py` + +|details-start| +**Interpretation across varying prevalence** +|details-split| Both class likelihood ratios are interpretable in terms of an odds ratio (pre-test and post-tests): @@ -1854,7 +1942,11 @@ prediction: \text{post-test probability} = \frac{\text{post-test odds}}{1 + \text{post-test odds}}. -**Mathematical divergences:** +|details-end| + +|details-start| +**Mathematical divergences** +|details-split| The positive likelihood ratio is undefined when :math:`fp = 0`, which can be interpreted as the classifier perfectly identifying positive cases. If :math:`fp @@ -1880,11 +1972,11 @@ averaging over cross-validation folds. For a worked-out demonstration of the :func:`class_likelihood_ratios` function, see the example below. -.. topic:: Examples: +|details-end| - * :ref:`sphx_glr_auto_examples_model_selection_plot_likelihood_ratios.py` - -.. topic:: References: +|details-start| +**References** +|details-split| * `Wikipedia entry for Likelihood ratios in diagnostic testing `_ @@ -1894,6 +1986,72 @@ see the example below. values with disease prevalence. Statistics in medicine, 16(9), 981-991. +|details-end| + +.. _d2_score_classification: + +D² score for classification +--------------------------- + +The D² score computes the fraction of deviance explained. +It is a generalization of R², where the squared error is generalized and replaced +by a classification deviance of choice :math:`\text{dev}(y, \hat{y})` +(e.g., Log loss). D² is a form of a *skill score*. +It is calculated as + +.. math:: + + D^2(y, \hat{y}) = 1 - \frac{\text{dev}(y, \hat{y})}{\text{dev}(y, y_{\text{null}})} \,. + +Where :math:`y_{\text{null}}` is the optimal prediction of an intercept-only model +(e.g., the per-class proportion of `y_true` in the case of the Log loss). + +Like R², the best possible score is 1.0 and it can be negative (because the +model can be arbitrarily worse). A constant model that always predicts +:math:`y_{\text{null}}`, disregarding the input features, would get a D² score +of 0.0. + +|details-start| +**D2 log loss score** +|details-split| + +The :func:`d2_log_loss_score` function implements the special case +of D² with the log loss, see :ref:`log_loss`, i.e.: + +.. math:: + + \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}). + +Here are some usage examples of the :func:`d2_log_loss_score` function:: + + >>> from sklearn.metrics import d2_log_loss_score + >>> y_true = [1, 1, 2, 3] + >>> y_pred = [ + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.0 + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.98, 0.01, 0.01], + ... [0.01, 0.98, 0.01], + ... [0.01, 0.01, 0.98], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.981... + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.1, 0.6, 0.3], + ... [0.1, 0.6, 0.3], + ... [0.4, 0.5, 0.1], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + -0.552... + +|details-end| .. _multilabel_ranking_metrics: @@ -2033,11 +2191,15 @@ Here is a small example of usage of this function:: 0.0 -.. topic:: References: +|details-start| +**References** +|details-split| * Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and knowledge discovery handbook (pp. 667-685). Springer US. +|details-end| + .. _ndcg: Normalized Discounted Cumulative Gain @@ -2082,7 +2244,9 @@ DCG score is and the NDCG score is the DCG score divided by the DCG score obtained for :math:`y`. -.. topic:: References: +|details-start| +**References** +|details-split| * `Wikipedia entry for Discounted Cumulative Gain `_ @@ -2100,6 +2264,8 @@ and the NDCG score is the DCG score divided by the DCG score obtained for European conference on information retrieval (pp. 414-421). Springer, Berlin, Heidelberg. +|details-end| + .. _regression_metrics: Regression metrics @@ -2131,9 +2297,6 @@ leads to a weighting of each individual score by the variance of the corresponding target variable. This setting quantifies the globally captured unscaled variance. If the target variables are of different scale, then this score puts more importance on explaining the higher variance variables. -``multioutput='variance_weighted'`` is the default value for :func:`r2_score` -for backward compatibility. This will be changed to ``uniform_average`` in the -future. .. _r2_score: @@ -2288,6 +2451,10 @@ function:: for an example of mean squared error usage to evaluate gradient boosting regression. +Taking the square root of the MSE, called the root mean squared error (RMSE), is another +common metric that provides a measure in the same units as the target variable. RSME is +available through the :func:`root_mean_squared_error` function. + .. _mean_squared_log_error: Mean squared logarithmic error @@ -2325,6 +2492,9 @@ function:: >>> mean_squared_log_error(y_true, y_pred) 0.044... +The root mean squared logarithmic error (RMSLE) is available through the +:func:`root_mean_squared_log_error` function. + .. _mean_absolute_percentage_error: Mean absolute percentage error @@ -2648,8 +2818,9 @@ model can be arbitrarily worse). A constant model that always predicts :math:`y_{\text{null}}`, disregarding the input features, would get a D² score of 0.0. -D² Tweedie score -^^^^^^^^^^^^^^^^ +|details-start| +**D² Tweedie score** +|details-split| The :func:`d2_tweedie_score` function implements the special case of D² where :math:`\text{dev}(y, \hat{y})` is the Tweedie deviance, see :ref:`mean_tweedie_deviance`. @@ -2664,8 +2835,11 @@ A scorer object with a specific choice of ``power`` can be built by:: >>> from sklearn.metrics import d2_tweedie_score, make_scorer >>> d2_tweedie_score_15 = make_scorer(d2_tweedie_score, power=1.5) -D² pinball score -^^^^^^^^^^^^^^^^^^^^^ +|details-end| + +|details-start| +**D² pinball score** +|details-split| The :func:`d2_pinball_score` function implements the special case of D² with the pinball loss, see :ref:`pinball_loss`, i.e.: @@ -2685,8 +2859,11 @@ A scorer object with a specific choice of ``alpha`` can be built by:: >>> from sklearn.metrics import d2_pinball_score, make_scorer >>> d2_pinball_score_08 = make_scorer(d2_pinball_score, alpha=0.8) -D² absolute error score -^^^^^^^^^^^^^^^^^^^^^^^ +|details-end| + +|details-start| +**D² absolute error score** +|details-split| The :func:`d2_absolute_error_score` function implements the special case of the :ref:`mean_absolute_error`: @@ -2711,6 +2888,8 @@ Here are some usage examples of the :func:`d2_absolute_error_score` function:: >>> d2_absolute_error_score(y_true, y_pred) 0.0 +|details-end| + .. _visualization_regression_evaluation: Visual evaluation of regression models @@ -2763,8 +2942,8 @@ model would grow with the predicted value of `E[y|X]` (either linearly for Poisson or quadratically for Gamma). When fitting a linear least squares regression model (see -:class:`~sklearn.linear_mnodel.LinearRegression` and -:class:`~sklearn.linear_mnodel.Ridge`), we can use this plot to check +:class:`~sklearn.linear_model.LinearRegression` and +:class:`~sklearn.linear_model.Ridge`), we can use this plot to check if some of the `model assumptions `_ are met, in particular that the residuals should be uncorrelated, their diff --git a/doc/modules/multiclass.rst b/doc/modules/multiclass.rst index 70bab7a1075ec..42762690ce8f7 100644 --- a/doc/modules/multiclass.rst +++ b/doc/modules/multiclass.rst @@ -63,8 +63,8 @@ can provide additional strategies beyond what is built-in: - :class:`semi_supervised.LabelSpreading` - :class:`discriminant_analysis.LinearDiscriminantAnalysis` - :class:`svm.LinearSVC` (setting multi_class="crammer_singer") - - :class:`linear_model.LogisticRegression` (setting multi_class="multinomial") - - :class:`linear_model.LogisticRegressionCV` (setting multi_class="multinomial") + - :class:`linear_model.LogisticRegression` (with most solvers) + - :class:`linear_model.LogisticRegressionCV` (with most solvers) - :class:`neural_network.MLPClassifier` - :class:`neighbors.NearestCentroid` - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis` @@ -86,8 +86,8 @@ can provide additional strategies beyond what is built-in: - :class:`ensemble.GradientBoostingClassifier` - :class:`gaussian_process.GaussianProcessClassifier` (setting multi_class = "one_vs_rest") - :class:`svm.LinearSVC` (setting multi_class="ovr") - - :class:`linear_model.LogisticRegression` (setting multi_class="ovr") - - :class:`linear_model.LogisticRegressionCV` (setting multi_class="ovr") + - :class:`linear_model.LogisticRegression` (most solvers) + - :class:`linear_model.LogisticRegressionCV` (most solvers) - :class:`linear_model.SGDClassifier` - :class:`linear_model.Perceptron` - :class:`linear_model.PassiveAggressiveClassifier` @@ -147,35 +147,35 @@ Target format Valid :term:`multiclass` representations for :func:`~sklearn.utils.multiclass.type_of_target` (`y`) are: - - 1d or column vector containing more than two discrete values. An - example of a vector ``y`` for 4 samples: - - >>> import numpy as np - >>> y = np.array(['apple', 'pear', 'apple', 'orange']) - >>> print(y) - ['apple' 'pear' 'apple' 'orange'] - - - Dense or sparse :term:`binary` matrix of shape ``(n_samples, n_classes)`` - with a single sample per row, where each column represents one class. An - example of both a dense and sparse :term:`binary` matrix ``y`` for 4 - samples, where the columns, in order, are apple, orange, and pear: - - >>> import numpy as np - >>> from sklearn.preprocessing import LabelBinarizer - >>> y = np.array(['apple', 'pear', 'apple', 'orange']) - >>> y_dense = LabelBinarizer().fit_transform(y) - >>> print(y_dense) - [[1 0 0] - [0 0 1] - [1 0 0] - [0 1 0]] - >>> from scipy import sparse - >>> y_sparse = sparse.csr_matrix(y_dense) - >>> print(y_sparse) - (0, 0) 1 - (1, 2) 1 - (2, 0) 1 - (3, 1) 1 +- 1d or column vector containing more than two discrete values. An + example of a vector ``y`` for 4 samples: + + >>> import numpy as np + >>> y = np.array(['apple', 'pear', 'apple', 'orange']) + >>> print(y) + ['apple' 'pear' 'apple' 'orange'] + +- Dense or sparse :term:`binary` matrix of shape ``(n_samples, n_classes)`` + with a single sample per row, where each column represents one class. An + example of both a dense and sparse :term:`binary` matrix ``y`` for 4 + samples, where the columns, in order, are apple, orange, and pear: + + >>> import numpy as np + >>> from sklearn.preprocessing import LabelBinarizer + >>> y = np.array(['apple', 'pear', 'apple', 'orange']) + >>> y_dense = LabelBinarizer().fit_transform(y) + >>> print(y_dense) + [[1 0 0] + [0 0 1] + [1 0 0] + [0 1 0]] + >>> from scipy import sparse + >>> y_sparse = sparse.csr_matrix(y_dense) + >>> print(y_sparse) + (0, 0) 1 + (1, 2) 1 + (2, 0) 1 + (3, 1) 1 For more information about :class:`~sklearn.preprocessing.LabelBinarizer`, refer to :ref:`preprocessing_targets`. @@ -311,8 +311,7 @@ Below is an example of multiclass learning using Output-Codes:: >>> from sklearn.multiclass import OutputCodeClassifier >>> from sklearn.svm import LinearSVC >>> X, y = datasets.load_iris(return_X_y=True) - >>> clf = OutputCodeClassifier(LinearSVC(random_state=0), - ... code_size=2, random_state=0) + >>> clf = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=0) >>> clf.fit(X, y).predict(X) array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -464,7 +463,7 @@ Note that all classifiers handling multiclass-multioutput (also known as multitask classification) tasks, support the multilabel classification task as a special case. Multitask classification is similar to the multioutput classification task with different model formulations. For more information, -see the relevant estimator documentat +see the relevant estimator documentation. Below is an example of multiclass-multioutput classification: @@ -529,6 +528,37 @@ using data obtained at a certain location. Each sample would be data obtained at one location and both wind speed and direction would be output for each sample. +The following regressors natively support multioutput regression: + + - :class:`cross_decomposition.CCA` + - :class:`tree.DecisionTreeRegressor` + - :class:`dummy.DummyRegressor` + - :class:`linear_model.ElasticNet` + - :class:`tree.ExtraTreeRegressor` + - :class:`ensemble.ExtraTreesRegressor` + - :class:`gaussian_process.GaussianProcessRegressor` + - :class:`neighbors.KNeighborsRegressor` + - :class:`kernel_ridge.KernelRidge` + - :class:`linear_model.Lars` + - :class:`linear_model.Lasso` + - :class:`linear_model.LassoLars` + - :class:`linear_model.LinearRegression` + - :class:`multioutput.MultiOutputRegressor` + - :class:`linear_model.MultiTaskElasticNet` + - :class:`linear_model.MultiTaskElasticNetCV` + - :class:`linear_model.MultiTaskLasso` + - :class:`linear_model.MultiTaskLassoCV` + - :class:`linear_model.OrthogonalMatchingPursuit` + - :class:`cross_decomposition.PLSCanonical` + - :class:`cross_decomposition.PLSRegression` + - :class:`linear_model.RANSACRegressor` + - :class:`neighbors.RadiusNeighborsRegressor` + - :class:`ensemble.RandomForestRegressor` + - :class:`multioutput.RegressorChain` + - :class:`linear_model.Ridge` + - :class:`linear_model.RidgeCV` + - :class:`compose.TransformedTargetRegressor` + Target format ------------- diff --git a/doc/modules/naive_bayes.rst b/doc/modules/naive_bayes.rst index 1cb8aa0d6dedf..05ca928dfae0b 100644 --- a/doc/modules/naive_bayes.rst +++ b/doc/modules/naive_bayes.rst @@ -69,11 +69,15 @@ On the flip side, although naive Bayes is known as a decent classifier, it is known to be a bad estimator, so the probability outputs from ``predict_proba`` are not to be taken too seriously. -.. topic:: References: +|details-start| +**References** +|details-split| - * H. Zhang (2004). `The optimality of Naive Bayes. - `_ - Proc. FLAIRS. +* H. Zhang (2004). `The optimality of Naive Bayes. + `_ + Proc. FLAIRS. + +|details-end| .. _gaussian_naive_bayes: @@ -147,8 +151,13 @@ that is particularly suited for imbalanced data sets. Specifically, CNB uses statistics from the *complement* of each class to compute the model's weights. The inventors of CNB show empirically that the parameter estimates for CNB are more stable than those for MNB. Further, CNB regularly outperforms MNB (often -by a considerable margin) on text classification tasks. The procedure for -calculating the weights is as follows: +by a considerable margin) on text classification tasks. + +|details-start| +**Weights calculation** +|details-split| + +The procedure for calculating the weights is as follows: .. math:: @@ -173,12 +182,18 @@ classification rule is: i.e., a document is assigned to the class that is the *poorest* complement match. -.. topic:: References: +|details-end| + +|details-start| +**References** +|details-split| - * Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003). - `Tackling the poor assumptions of naive bayes text classifiers. - `_ - In ICML (Vol. 3, pp. 616-623). +* Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003). + `Tackling the poor assumptions of naive bayes text classifiers. + `_ + In ICML (Vol. 3, pp. 616-623). + +|details-end| .. _bernoulli_naive_bayes: @@ -190,7 +205,7 @@ algorithms for data that is distributed according to multivariate Bernoulli distributions; i.e., there may be multiple features but each one is assumed to be a binary-valued (Bernoulli, boolean) variable. Therefore, this class requires samples to be represented as binary-valued -feature vectors; if handed any other kind of data, a ``BernoulliNB`` instance +feature vectors; if handed any other kind of data, a :class:`BernoulliNB` instance may binarize its input (depending on the ``binarize`` parameter). The decision rule for Bernoulli naive Bayes is based on @@ -205,24 +220,28 @@ that is an indicator for class :math:`y`, where the multinomial variant would simply ignore a non-occurring feature. In the case of text classification, word occurrence vectors (rather than word -count vectors) may be used to train and use this classifier. ``BernoulliNB`` +count vectors) may be used to train and use this classifier. :class:`BernoulliNB` might perform better on some datasets, especially those with shorter documents. It is advisable to evaluate both models, if time permits. -.. topic:: References: +|details-start| +**References** +|details-split| + +* C.D. Manning, P. Raghavan and H. Schütze (2008). Introduction to + Information Retrieval. Cambridge University Press, pp. 234-265. - * C.D. Manning, P. Raghavan and H. Schütze (2008). Introduction to - Information Retrieval. Cambridge University Press, pp. 234-265. +* A. McCallum and K. Nigam (1998). + `A comparison of event models for Naive Bayes text classification. + `_ + Proc. AAAI/ICML-98 Workshop on Learning for Text Categorization, pp. 41-48. - * A. McCallum and K. Nigam (1998). - `A comparison of event models for Naive Bayes text classification. - `_ - Proc. AAAI/ICML-98 Workshop on Learning for Text Categorization, pp. 41-48. +* V. Metsis, I. Androutsopoulos and G. Paliouras (2006). + `Spam filtering with Naive Bayes -- Which Naive Bayes? + `_ + 3rd Conf. on Email and Anti-Spam (CEAS). - * V. Metsis, I. Androutsopoulos and G. Paliouras (2006). - `Spam filtering with Naive Bayes -- Which Naive Bayes? - `_ - 3rd Conf. on Email and Anti-Spam (CEAS). +|details-end| .. _categorical_naive_bayes: @@ -239,6 +258,10 @@ For each feature :math:`i` in the training set :math:`X`, of X conditioned on the class y. The index set of the samples is defined as :math:`J = \{ 1, \dots, m \}`, with :math:`m` as the number of samples. +|details-start| +**Probability calculation** +|details-split| + The probability of category :math:`t` in feature :math:`i` given class :math:`c` is estimated as: @@ -253,9 +276,11 @@ to class :math:`c`, :math:`N_{c} = |\{ j \in J\mid y_j = c\}|` is the number of samples with class c, :math:`\alpha` is a smoothing parameter and :math:`n_i` is the number of available categories of feature :math:`i`. -:class:`CategoricalNB` assumes that the sample matrix :math:`X` is encoded -(for instance with the help of :class:`OrdinalEncoder`) such that all -categories for each feature :math:`i` are represented with numbers +|details-end| + +:class:`CategoricalNB` assumes that the sample matrix :math:`X` is encoded (for +instance with the help of :class:`~sklearn.preprocessing.OrdinalEncoder`) such +that all categories for each feature :math:`i` are represented with numbers :math:`0, ..., n_i - 1` where :math:`n_i` is the number of available categories of feature :math:`i`. diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst index dfd6791d9a3d3..b081b29572d8a 100644 --- a/doc/modules/neighbors.rst +++ b/doc/modules/neighbors.rst @@ -59,12 +59,12 @@ The choice of neighbors search algorithm is controlled through the keyword from the training data. For a discussion of the strengths and weaknesses of each option, see `Nearest Neighbor Algorithms`_. - .. warning:: +.. warning:: - Regarding the Nearest Neighbors algorithms, if two - neighbors :math:`k+1` and :math:`k` have identical distances - but different labels, the result will depend on the ordering of the - training data. + Regarding the Nearest Neighbors algorithms, if two + neighbors :math:`k+1` and :math:`k` have identical distances + but different labels, the result will depend on the ordering of the + training data. Finding the Nearest Neighbors ----------------------------- @@ -136,9 +136,13 @@ have the same interface; we'll show an example of using the KD Tree here: Refer to the :class:`KDTree` and :class:`BallTree` class documentation for more information on the options available for nearest neighbors searches, including specification of query strategies, distance metrics, etc. For a list -of available metrics, see the documentation of the :class:`DistanceMetric` class -and the metrics listed in `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. -Note that the "cosine" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`. +of valid metrics use `KDTree.valid_metrics` and `BallTree.valid_metrics`: + + >>> from sklearn.neighbors import KDTree, BallTree + >>> KDTree.valid_metrics + ['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity'] + >>> BallTree.valid_metrics + ['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity', 'seuclidean', 'mahalanobis', 'hamming', 'canberra', 'braycurtis', 'jaccard', 'dice', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath', 'haversine', 'pyfunc'] .. _classification: @@ -184,13 +188,9 @@ distance can be supplied to compute the weights. .. |classification_1| image:: ../auto_examples/neighbors/images/sphx_glr_plot_classification_001.png :target: ../auto_examples/neighbors/plot_classification.html - :scale: 50 - -.. |classification_2| image:: ../auto_examples/neighbors/images/sphx_glr_plot_classification_002.png - :target: ../auto_examples/neighbors/plot_classification.html - :scale: 50 + :scale: 75 -.. centered:: |classification_1| |classification_2| +.. centered:: |classification_1| .. topic:: Examples: @@ -304,13 +304,15 @@ In scikit-learn, KD tree neighbors searches are specified using the keyword ``algorithm = 'kd_tree'``, and are computed using the class :class:`KDTree`. - -.. topic:: References: +|details-start| +**References** +|details-split| * `"Multidimensional binary search trees used for associative searching" `_, Bentley, J.L., Communications of the ACM (1975) +|details-end| .. _ball_tree: @@ -343,15 +345,21 @@ neighbors searches are specified using the keyword ``algorithm = 'ball_tree'``, and are computed using the class :class:`BallTree`. Alternatively, the user can work with the :class:`BallTree` class directly. -.. topic:: References: +|details-start| +**References** +|details-split| * `"Five Balltree Construction Algorithms" `_, Omohundro, S.M., International Computer Science Institute Technical Report (1989) -Choice of Nearest Neighbors Algorithm -------------------------------------- +|details-end| + +|details-start| +**Choice of Nearest Neighbors Algorithm** +|details-split| + The optimal algorithm for a given dataset is a complicated choice, and depends on a number of factors: @@ -436,8 +444,12 @@ based on the following assumptions: * when :math:`D > 15`, the intrinsic dimensionality of the data is generally too high for tree-based methods -Effect of ``leaf_size`` ------------------------ +|details-end| + +|details-start| +**Effect of ``leaf_size``** +|details-split| + As noted above, for small sample sizes a brute force search can be more efficient than a tree-based query. This fact is accounted for in the ball tree and KD tree by internally switching to brute force searches within @@ -464,13 +476,16 @@ leaf nodes. The level of this switch can be specified with the parameter the size of the training set. ``leaf_size`` is not referenced for brute force queries. +|details-end| -Valid Metrics for Nearest Neighbor Algorithms ---------------------------------------------- +|details-start| +**Valid Metrics for Nearest Neighbor Algorithms** +|details-split| -For a list of available metrics, see the documentation of the :class:`DistanceMetric` -class and the metrics listed in `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. -Note that the "cosine" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`. +For a list of available metrics, see the documentation of the +:class:`~sklearn.metrics.DistanceMetric` class and the metrics listed in +`sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the "cosine" +metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`. A list of valid metrics for any of the above algorithms can be obtained by using their ``valid_metric`` attribute. For example, valid metrics for ``KDTree`` can be generated by: @@ -479,6 +494,7 @@ A list of valid metrics for any of the above algorithms can be obtained by using >>> print(sorted(KDTree.valid_metrics)) ['chebyshev', 'cityblock', 'euclidean', 'infinity', 'l1', 'l2', 'manhattan', 'minkowski', 'p'] +|details-end| .. _nearest_centroid_classifier: @@ -790,9 +806,9 @@ space: p_{i j} = \frac{\exp(-||L x_i - L x_j||^2)}{\sum\limits_{k \ne i} {\exp{-(||L x_i - L x_k||^2)}}} , \quad p_{i i} = 0 - -Mahalanobis distance -^^^^^^^^^^^^^^^^^^^^ +|details-start| +**Mahalanobis distance** +|details-split| NCA can be seen as learning a (squared) Mahalanobis distance metric: @@ -803,6 +819,7 @@ NCA can be seen as learning a (squared) Mahalanobis distance metric: where :math:`M = L^T L` is a symmetric positive semi-definite matrix of size ``(n_features, n_features)``. +|details-end| Implementation -------------- @@ -843,3 +860,5 @@ added space complexity in the operation. `Wikipedia entry on Neighborhood Components Analysis `_ + +|details-end| diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst index 35b7ffd60b5d1..7ee2387068c81 100644 --- a/doc/modules/neural_networks_supervised.rst +++ b/doc/modules/neural_networks_supervised.rst @@ -20,7 +20,7 @@ Multi-layer Perceptron ====================== **Multi-layer Perceptron (MLP)** is a supervised learning algorithm that learns -a function :math:`f(\cdot): R^m \rightarrow R^o` by training on a dataset, +a function :math:`f: R^m \rightarrow R^o` by training on a dataset, where :math:`m` is the number of dimensions for input and :math:`o` is the number of dimensions for output. Given a set of features :math:`X = {x_1, x_2, ..., x_m}` and a target :math:`y`, it can learn a non-linear function approximator for either @@ -49,28 +49,33 @@ The module contains the public attributes ``coefs_`` and ``intercepts_``. :math:`i+1`. ``intercepts_`` is a list of bias vectors, where the vector at index :math:`i` represents the bias values added to layer :math:`i+1`. +|details-start| +**Advantages and disadvantages of Multi-layer Perceptron** +|details-split| + The advantages of Multi-layer Perceptron are: - + Capability to learn non-linear models. ++ Capability to learn non-linear models. - + Capability to learn models in real-time (on-line learning) - using ``partial_fit``. ++ Capability to learn models in real-time (on-line learning) + using ``partial_fit``. The disadvantages of Multi-layer Perceptron (MLP) include: - + MLP with hidden layers have a non-convex loss function where there exists - more than one local minimum. Therefore different random weight - initializations can lead to different validation accuracy. ++ MLP with hidden layers have a non-convex loss function where there exists + more than one local minimum. Therefore different random weight + initializations can lead to different validation accuracy. - + MLP requires tuning a number of hyperparameters such as the number of - hidden neurons, layers, and iterations. ++ MLP requires tuning a number of hyperparameters such as the number of + hidden neurons, layers, and iterations. - + MLP is sensitive to feature scaling. ++ MLP is sensitive to feature scaling. Please see :ref:`Tips on Practical Use ` section that addresses some of these disadvantages. +|details-end| Classification ============== @@ -146,7 +151,8 @@ See the examples below and the docstring of .. topic:: Examples: * :ref:`sphx_glr_auto_examples_neural_networks_plot_mlp_training_curves.py` - * :ref:`sphx_glr_auto_examples_neural_networks_plot_mnist_filters.py` + * See :ref:`sphx_glr_auto_examples_neural_networks_plot_mnist_filters.py` for + visualized representation of trained weights. Regression ========== @@ -199,7 +205,7 @@ the parameter space search. :math:`Loss` is the loss function used for the network. More details can be found in the documentation of -`SGD `_ +`SGD `_ Adam is similar to SGD in a sense that it is a stochastic optimizer, but it can automatically adjust the amount to update parameters based on adaptive estimates @@ -223,14 +229,14 @@ Complexity Suppose there are :math:`n` training samples, :math:`m` features, :math:`k` hidden layers, each containing :math:`h` neurons - for simplicity, and :math:`o` output neurons. The time complexity of backpropagation is -:math:`O(n\cdot m \cdot h^k \cdot o \cdot i)`, where :math:`i` is the number +:math:`O(i \cdot n \cdot (m \cdot h + (k - 1) \cdot h \cdot h + h \cdot o))`, where :math:`i` is the number of iterations. Since backpropagation has a high time complexity, it is advisable to start with smaller number of hidden neurons and few hidden layers for training. - +|details-start| Mathematical formulation -======================== +|details-split| Given a set of training examples :math:`(x_1, y_1), (x_2, y_2), \ldots, (x_n, y_n)` where :math:`x_i \in \mathbf{R}^n` and :math:`y_i \in \{0, 1\}`, a one hidden @@ -304,41 +310,42 @@ with a value larger than 0. The algorithm stops when it reaches a preset maximum number of iterations; or when the improvement in loss is below a certain, small number. - +|details-end| .. _mlp_tips: Tips on Practical Use ===================== - * Multi-layer Perceptron is sensitive to feature scaling, so it - is highly recommended to scale your data. For example, scale each - attribute on the input vector X to [0, 1] or [-1, +1], or standardize - it to have mean 0 and variance 1. Note that you must apply the *same* - scaling to the test set for meaningful results. - You can use :class:`StandardScaler` for standardization. - - >>> from sklearn.preprocessing import StandardScaler # doctest: +SKIP - >>> scaler = StandardScaler() # doctest: +SKIP - >>> # Don't cheat - fit only on training data - >>> scaler.fit(X_train) # doctest: +SKIP - >>> X_train = scaler.transform(X_train) # doctest: +SKIP - >>> # apply same transformation to test data - >>> X_test = scaler.transform(X_test) # doctest: +SKIP - - An alternative and recommended approach is to use :class:`StandardScaler` - in a :class:`Pipeline` - - * Finding a reasonable regularization parameter :math:`\alpha` is - best done using :class:`GridSearchCV`, usually in the - range ``10.0 ** -np.arange(1, 7)``. - - * Empirically, we observed that `L-BFGS` converges faster and - with better solutions on small datasets. For relatively large - datasets, however, `Adam` is very robust. It usually converges - quickly and gives pretty good performance. `SGD` with momentum or - nesterov's momentum, on the other hand, can perform better than - those two algorithms if learning rate is correctly tuned. +* Multi-layer Perceptron is sensitive to feature scaling, so it + is highly recommended to scale your data. For example, scale each + attribute on the input vector X to [0, 1] or [-1, +1], or standardize + it to have mean 0 and variance 1. Note that you must apply the *same* + scaling to the test set for meaningful results. + You can use :class:`~sklearn.preprocessing.StandardScaler` for standardization. + + >>> from sklearn.preprocessing import StandardScaler # doctest: +SKIP + >>> scaler = StandardScaler() # doctest: +SKIP + >>> # Don't cheat - fit only on training data + >>> scaler.fit(X_train) # doctest: +SKIP + >>> X_train = scaler.transform(X_train) # doctest: +SKIP + >>> # apply same transformation to test data + >>> X_test = scaler.transform(X_test) # doctest: +SKIP + + An alternative and recommended approach is to use + :class:`~sklearn.preprocessing.StandardScaler` in a + :class:`~sklearn.pipeline.Pipeline` + +* Finding a reasonable regularization parameter :math:`\alpha` is best done + using :class:`~sklearn.model_selection.GridSearchCV`, usually in the range + ``10.0 ** -np.arange(1, 7)``. + +* Empirically, we observed that `L-BFGS` converges faster and + with better solutions on small datasets. For relatively large + datasets, however, `Adam` is very robust. It usually converges + quickly and gives pretty good performance. `SGD` with momentum or + nesterov's momentum, on the other hand, can perform better than + those two algorithms if learning rate is correctly tuned. More control with warm_start ============================ @@ -354,7 +361,9 @@ or want to do additional monitoring, using ``warm_start=True`` and ... # additional monitoring / inspection MLPClassifier(... -.. topic:: References: +|details-start| +**References** +|details-split| * `"Learning representations by back-propagating errors." `_ @@ -372,3 +381,5 @@ or want to do additional monitoring, using ``warm_start=True`` and * :arxiv:`"Adam: A method for stochastic optimization." <1412.6980>` Kingma, Diederik, and Jimmy Ba (2014) + +|details-end| diff --git a/doc/modules/outlier_detection.rst b/doc/modules/outlier_detection.rst index 29ca263118bae..d003b645eb19c 100644 --- a/doc/modules/outlier_detection.rst +++ b/doc/modules/outlier_detection.rst @@ -382,7 +382,7 @@ This strategy is illustrated below. * Breunig, Kriegel, Ng, and Sander (2000) `LOF: identifying density-based local outliers. - `_ + `_ Proc. ACM SIGMOD .. _novelty_with_lof: @@ -411,7 +411,7 @@ Note that ``fit_predict`` is not available in this case to avoid inconsistencies Novelty detection with Local Outlier Factor is illustrated below. - .. figure:: ../auto_examples/neighbors/images/sphx_glr_plot_lof_novelty_detection_001.png - :target: ../auto_examples/neighbors/plot_lof_novelty_detection.html - :align: center - :scale: 75% +.. figure:: ../auto_examples/neighbors/images/sphx_glr_plot_lof_novelty_detection_001.png + :target: ../auto_examples/neighbors/plot_lof_novelty_detection.html + :align: center + :scale: 75% diff --git a/doc/modules/partial_dependence.rst b/doc/modules/partial_dependence.rst index 92a44c0640f98..94f7206140b90 100644 --- a/doc/modules/partial_dependence.rst +++ b/doc/modules/partial_dependence.rst @@ -33,7 +33,7 @@ The figure below shows two one-way and one two-way partial dependence plots for the bike sharing dataset, with a :class:`~sklearn.ensemble.HistGradientBoostingRegressor`: -.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_partial_dependence_005.png +.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_partial_dependence_006.png :target: ../auto_examples/inspection/plot_partial_dependence.html :align: center :scale: 70 @@ -79,6 +79,10 @@ parameter takes a list of indices, names of the categorical features or a boolea mask. The graphical representation of partial dependence for categorical features is a bar plot or a 2D heatmap. +|details-start| +**PDPs for multi-class classification** +|details-split| + For multi-class classification, you need to set the class label for which the PDPs should be created via the ``target`` argument:: @@ -93,6 +97,8 @@ the PDPs should be created via the ``target`` argument:: The same parameter ``target`` is used to specify the target in multi-output regression settings. +|details-end| + If you need the raw values of the partial dependence function rather than the plots, you can use the :func:`sklearn.inspection.partial_dependence` function:: @@ -102,7 +108,7 @@ the plots, you can use the >>> results = partial_dependence(clf, X, [0]) >>> results["average"] array([[ 2.466..., 2.466..., ... - >>> results["values"] + >>> results["grid_values"] [array([-1.624..., -1.592..., ... The values at which the partial dependence should be evaluated are directly diff --git a/doc/modules/permutation_importance.rst b/doc/modules/permutation_importance.rst index f2530aac3a388..368c6a6409aa0 100644 --- a/doc/modules/permutation_importance.rst +++ b/doc/modules/permutation_importance.rst @@ -6,15 +6,45 @@ Permutation feature importance .. currentmodule:: sklearn.inspection -Permutation feature importance is a model inspection technique that can be used -for any :term:`fitted` :term:`estimator` when the data is tabular. This is -especially useful for non-linear or opaque :term:`estimators`. The permutation -feature importance is defined to be the decrease in a model score when a single -feature value is randomly shuffled [1]_. This procedure breaks the relationship -between the feature and the target, thus the drop in the model score is -indicative of how much the model depends on the feature. This technique -benefits from being model agnostic and can be calculated many times with -different permutations of the feature. +Permutation feature importance is a model inspection technique that measures the +contribution of each feature to a :term:`fitted` model's statistical performance +on a given tabular dataset. This technique is particularly useful for non-linear +or opaque :term:`estimators`, and involves randomly shuffling the values of a +single feature and observing the resulting degradation of the model's score +[1]_. By breaking the relationship between the feature and the target, we +determine how much the model relies on such particular feature. + +In the following figures, we observe the effect of permuting features on the correlation +between the feature and the target and consequently on the model statistical +performance. + +.. image:: ../images/permuted_predictive_feature.png + :align: center + +.. image:: ../images/permuted_non_predictive_feature.png + :align: center + +On the top figure, we observe that permuting a predictive feature breaks the +correlation between the feature and the target, and consequently the model +statistical performance decreases. On the bottom figure, we observe that permuting +a non-predictive feature does not significantly degrade the model statistical performance. + +One key advantage of permutation feature importance is that it is +model-agnostic, i.e. it can be applied to any fitted estimator. Moreover, it can +be calculated multiple times with different permutations of the feature, further +providing a measure of the variance in the estimated feature importances for the +specific trained model. + +The figure below shows the permutation feature importance of a +:class:`~sklearn.ensemble.RandomForestClassifier` trained on an augmented +version of the titanic dataset that contains a `random_cat` and a `random_num` +features, i.e. a categrical and a numerical feature that are not correlated in +any way with the target variable: + +.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_permutation_importance_002.png + :target: ../auto_examples/inspection/plot_permutation_importance.html + :align: center + :scale: 70 .. warning:: @@ -74,15 +104,18 @@ highlight which features contribute the most to the generalization power of the inspected model. Features that are important on the training set but not on the held-out set might cause the model to overfit. -The permutation feature importance is the decrease in a model score when a single -feature value is randomly shuffled. The score function to be used for the -computation of importances can be specified with the `scoring` argument, -which also accepts multiple scorers. Using multiple scorers is more computationally -efficient than sequentially calling :func:`permutation_importance` several times -with a different scorer, as it reuses model predictions. +The permutation feature importance depends on the score function that is +specified with the `scoring` argument. This argument accepts multiple scorers, +which is more computationally efficient than sequentially calling +:func:`permutation_importance` several times with a different scorer, as it +reuses model predictions. -An example of using multiple scorers is shown below, employing a list of metrics, -but more input formats are possible, as documented in :ref:`multimetric_scoring`. +|details-start| +**Example of permutation feature importance using multiple scorers** +|details-split| + +In the example below we use a list of metrics, but more input formats are +possible, as documented in :ref:`multimetric_scoring`. >>> scoring = ['r2', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error'] >>> r_multi = permutation_importance( @@ -116,7 +149,9 @@ The ranking of the features is approximately the same for different metrics even if the scales of the importance values are very different. However, this is not guaranteed and different metrics might lead to significantly different feature importances, in particular for models trained for imbalanced classification problems, -for which the choice of the classification metric can be critical. +for which **the choice of the classification metric can be critical**. + +|details-end| Outline of the permutation importance algorithm ----------------------------------------------- @@ -156,9 +191,9 @@ over low cardinality features such as binary features or categorical variables with a small number of possible categories. Permutation-based feature importances do not exhibit such a bias. Additionally, -the permutation feature importance may be computed performance metric on the -model predictions and can be used to analyze any model class (not -just tree-based models). +the permutation feature importance may be computed with any performance metric +on the model predictions and can be used to analyze any model class (not just +tree-based models). The following example highlights the limitations of impurity-based feature importance in contrast to permutation-based feature importance: @@ -168,13 +203,29 @@ Misleading values on strongly correlated features ------------------------------------------------- When two features are correlated and one of the features is permuted, the model -will still have access to the feature through its correlated feature. This will -result in a lower importance value for both features, where they might -*actually* be important. +still has access to the latter through its correlated feature. This results in a +lower reported importance value for both features, though they might *actually* +be important. + +The figure below shows the permutation feature importance of a +:class:`~sklearn.ensemble.RandomForestClassifier` trained using the +:ref:`breast_cancer_dataset`, which contains strongly correlated features. A +naive interpretation would suggest that all features are unimportant: + +.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_permutation_importance_multicollinear_002.png + :target: ../auto_examples/inspection/plot_permutation_importance_multicollinear.html + :align: center + :scale: 70 + +One way to handle the issue is to cluster features that are correlated and only +keep one feature from each cluster. + +.. figure:: ../auto_examples/inspection/images/sphx_glr_plot_permutation_importance_multicollinear_004.png + :target: ../auto_examples/inspection/plot_permutation_importance_multicollinear.html + :align: center + :scale: 70 -One way to handle this is to cluster features that are correlated and only -keep one feature from each cluster. This strategy is explored in the following -example: +For more details on such strategy, see the example :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance_multicollinear.py`. .. topic:: Examples: diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst index 9c2af6424a298..99678f2b3e45b 100644 --- a/doc/modules/preprocessing.rst +++ b/doc/modules/preprocessing.rst @@ -10,9 +10,10 @@ The ``sklearn.preprocessing`` package provides several common utility functions and transformer classes to change raw feature vectors into a representation that is more suitable for the downstream estimators. -In general, learning algorithms benefit from standardization of the data set. If -some outliers are present in the set, robust scalers or transformers are more -appropriate. The behaviors of the different scalers, transformers, and +In general, many learning algorithms such as linear models benefit from standardization of the data set +(see :ref:`sphx_glr_auto_examples_preprocessing_plot_scaling_importance.py`). +If some outliers are present in the set, robust scalers or other transformers can +be more appropriate. The behaviors of the different scalers, transformers, and normalizers on a dataset containing marginal outliers is highlighted in :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`. @@ -218,21 +219,28 @@ of the data is likely to not work very well. In these cases, you can use :class:`RobustScaler` as a drop-in replacement instead. It uses more robust estimates for the center and range of your data. +|details-start| +**References** +|details-split| -.. topic:: References: +Further discussion on the importance of centering and scaling data is +available on this FAQ: `Should I normalize/standardize/rescale the data? +`_ - Further discussion on the importance of centering and scaling data is - available on this FAQ: `Should I normalize/standardize/rescale the data? - `_ +|details-end| -.. topic:: Scaling vs Whitening +|details-start| +**Scaling vs Whitening** +|details-split| - It is sometimes not enough to center and scale the features - independently, since a downstream model can further make some assumption - on the linear independence of the features. +It is sometimes not enough to center and scale the features +independently, since a downstream model can further make some assumption +on the linear independence of the features. - To address this issue you can use :class:`~sklearn.decomposition.PCA` with - ``whiten=True`` to further remove the linear correlation across features. +To address this issue you can use :class:`~sklearn.decomposition.PCA` with +``whiten=True`` to further remove the linear correlation across features. + +|details-end| .. _kernel_centering: @@ -247,7 +255,9 @@ followed by the removal of the mean in that space. In other words, :class:`KernelCenterer` computes the centered Gram matrix associated to a positive semidefinite kernel :math:`K`. +|details-start| **Mathematical formulation** +|details-split| We can have a look at the mathematical formulation now that we have the intuition. Let :math:`K` be a kernel matrix of shape `(n_samples, n_samples)` @@ -300,6 +310,8 @@ centering :math:`K_{test}` is done as: `_ Neural computation 10.5 (1998): 1299-1319. +|details-end| + .. _preprocessing_transformer: Non-linear transformation @@ -371,7 +383,9 @@ possible in order to stabilize variance and minimize skewness. :class:`PowerTransformer` currently provides two such power transformations, the Yeo-Johnson transform and the Box-Cox transform. -The Yeo-Johnson transform is given by: +|details-start| +**Yeo-Johnson transform** +|details-split| .. math:: x_i^{(\lambda)} = @@ -382,7 +396,11 @@ The Yeo-Johnson transform is given by: - \ln (- x_i + 1) & \text{if } \lambda = 2, x_i < 0 \end{cases} -while the Box-Cox transform is given by: +|details-end| + +|details-start| +**Box-Cox transform** +|details-split| .. math:: x_i^{(\lambda)} = @@ -412,6 +430,8 @@ While the above example sets the `standardize` option to `False`, :class:`PowerTransformer` will apply zero-mean, unit-variance normalization to the transformed output by default. +|details-end| + Below are examples of Box-Cox and Yeo-Johnson applied to various probability distributions. Note that when applied to certain distributions, the power transforms achieve very Gaussian-like results, but with others, they are @@ -498,8 +518,9 @@ The normalizer instance can then be used on sample vectors as any transformer:: Note: L2 normalization is also known as spatial sign preprocessing. -.. topic:: Sparse input - +|details-start| +**Sparse input** +|details-split| :func:`normalize` and :class:`Normalizer` accept **both dense array-like and sparse matrices from scipy.sparse as input**. @@ -508,6 +529,8 @@ Note: L2 normalization is also known as spatial sign preprocessing. efficient Cython routines. To avoid unnecessary memory copies, it is recommended to choose the CSR representation upstream. +|details-end| + .. _preprocessing_categorical_features: Encoding categorical features @@ -685,7 +708,7 @@ be encoded as all zeros:: All the categories in `X_test` are unknown during transform and will be mapped to all zeros. This means that unknown categories will have the same mapping as -the dropped category. :meth`OneHotEncoder.inverse_transform` will map all zeros +the dropped category. :meth:`OneHotEncoder.inverse_transform` will map all zeros to the dropped category if a category is dropped and `None` if a category is not dropped:: @@ -698,6 +721,10 @@ not dropped:: >>> drop_enc.inverse_transform(X_trans) array([['female', None, None]], dtype=object) +|details-start| +**Support of categorical features with missing values** +|details-split| + :class:`OneHotEncoder` supports categorical features with missing values by considering the missing values as an additional category:: @@ -729,14 +756,17 @@ separate categories:: See :ref:`dict_feature_extraction` for categorical features that are represented as a dict, not as scalars. -.. _one_hot_encoder_infrequent_categories: +|details-end| + +.. _encoder_infrequent_categories: Infrequent categories --------------------- -:class:`OneHotEncoder` supports aggregating infrequent categories into a single -output for each feature. The parameters to enable the gathering of infrequent -categories are `min_frequency` and `max_categories`. +:class:`OneHotEncoder` and :class:`OrdinalEncoder` support aggregating +infrequent categories into a single output for each feature. The parameters to +enable the gathering of infrequent categories are `min_frequency` and +`max_categories`. 1. `min_frequency` is either an integer greater or equal to 1, or a float in the interval `(0.0, 1.0)`. If `min_frequency` is an integer, categories with @@ -750,11 +780,47 @@ categories are `min_frequency` and `max_categories`. input feature. `max_categories` includes the feature that combines infrequent categories. -In the following example, the categories, `'dog', 'snake'` are considered -infrequent:: +In the following example with :class:`OrdinalEncoder`, the categories `'dog' and +'snake'` are considered infrequent:: >>> X = np.array([['dog'] * 5 + ['cat'] * 20 + ['rabbit'] * 10 + ... ['snake'] * 3], dtype=object).T + >>> enc = preprocessing.OrdinalEncoder(min_frequency=6).fit(X) + >>> enc.infrequent_categories_ + [array(['dog', 'snake'], dtype=object)] + >>> enc.transform(np.array([['dog'], ['cat'], ['rabbit'], ['snake']])) + array([[2.], + [0.], + [1.], + [2.]]) + +:class:`OrdinalEncoder`'s `max_categories` do **not** take into account missing +or unknown categories. Setting `unknown_value` or `encoded_missing_value` to an +integer will increase the number of unique integer codes by one each. This can +result in up to `max_categories + 2` integer codes. In the following example, +"a" and "d" are considered infrequent and grouped together into a single +category, "b" and "c" are their own categories, unknown values are encoded as 3 +and missing values are encoded as 4. + + >>> X_train = np.array( + ... [["a"] * 5 + ["b"] * 20 + ["c"] * 10 + ["d"] * 3 + [np.nan]], + ... dtype=object).T + >>> enc = preprocessing.OrdinalEncoder( + ... handle_unknown="use_encoded_value", unknown_value=3, + ... max_categories=3, encoded_missing_value=4) + >>> _ = enc.fit(X_train) + >>> X_test = np.array([["a"], ["b"], ["c"], ["d"], ["e"], [np.nan]], dtype=object) + >>> enc.transform(X_test) + array([[2.], + [0.], + [1.], + [2.], + [3.], + [4.]]) + +Similarity, :class:`OneHotEncoder` can be configured to group together infrequent +categories:: + >>> enc = preprocessing.OneHotEncoder(min_frequency=6, sparse_output=False).fit(X) >>> enc.infrequent_categories_ [array(['dog', 'snake'], dtype=object)] @@ -830,6 +896,131 @@ lexicon order. >>> enc.infrequent_categories_ [array(['b', 'c'], dtype=object)] +.. _target_encoder: + +Target Encoder +-------------- + +.. currentmodule:: sklearn.preprocessing + +The :class:`TargetEncoder` uses the target mean conditioned on the categorical +feature for encoding unordered categories, i.e. nominal categories [PAR]_ +[MIC]_. This encoding scheme is useful with categorical features with high +cardinality, where one-hot encoding would inflate the feature space making it +more expensive for a downstream model to process. A classical example of high +cardinality categories are location based such as zip code or region. + +|details-start| +**Binary classification targets** +|details-split| + +For the binary classification target, the target encoding is given by: + +.. math:: + S_i = \lambda_i\frac{n_{iY}}{n_i} + (1 - \lambda_i)\frac{n_Y}{n} + +where :math:`S_i` is the encoding for category :math:`i`, :math:`n_{iY}` is the +number of observations with :math:`Y=1` and category :math:`i`, :math:`n_i` is +the number of observations with category :math:`i`, :math:`n_Y` is the number of +observations with :math:`Y=1`, :math:`n` is the number of observations, and +:math:`\lambda_i` is a shrinkage factor for category :math:`i`. The shrinkage +factor is given by: + +.. math:: + \lambda_i = \frac{n_i}{m + n_i} + +where :math:`m` is a smoothing factor, which is controlled with the `smooth` +parameter in :class:`TargetEncoder`. Large smoothing factors will put more +weight on the global mean. When `smooth="auto"`, the smoothing factor is +computed as an empirical Bayes estimate: :math:`m=\sigma_i^2/\tau^2`, where +:math:`\sigma_i^2` is the variance of `y` with category :math:`i` and +:math:`\tau^2` is the global variance of `y`. + +|details-end| + +|details-start| +**Multiclass classification targets** +|details-split| + +For multiclass classification targets, the formulation is similar to binary +classification: + +.. math:: + S_{ij} = \lambda_i\frac{n_{iY_j}}{n_i} + (1 - \lambda_i)\frac{n_{Y_j}}{n} + +where :math:`S_{ij}` is the encoding for category :math:`i` and class :math:`j`, +:math:`n_{iY_j}` is the number of observations with :math:`Y=j` and category +:math:`i`, :math:`n_i` is the number of observations with category :math:`i`, +:math:`n_{Y_j}` is the number of observations with :math:`Y=j`, :math:`n` is the +number of observations, and :math:`\lambda_i` is a shrinkage factor for category +:math:`i`. + +|details-end| + +|details-start| +**Continuous targets** +|details-split| + +For continuous targets, the formulation is similar to binary classification: + +.. math:: + S_i = \lambda_i\frac{\sum_{k\in L_i}Y_k}{n_i} + (1 - \lambda_i)\frac{\sum_{k=1}^{n}Y_k}{n} + +where :math:`L_i` is the set of observations with category :math:`i` and +:math:`n_i` is the number of observations with category :math:`i`. + +|details-end| + +:meth:`~TargetEncoder.fit_transform` internally relies on a :term:`cross fitting` +scheme to prevent target information from leaking into the train-time +representation, especially for non-informative high-cardinality categorical +variables, and help prevent the downstream model from overfitting spurious +correlations. Note that as a result, `fit(X, y).transform(X)` does not equal +`fit_transform(X, y)`. In :meth:`~TargetEncoder.fit_transform`, the training +data is split into *k* folds (determined by the `cv` parameter) and each fold is +encoded using the encodings learnt using the other *k-1* folds. The following +diagram shows the :term:`cross fitting` scheme in +:meth:`~TargetEncoder.fit_transform` with the default `cv=5`: + +.. image:: ../images/target_encoder_cross_validation.svg + :width: 600 + :align: center + +:meth:`~TargetEncoder.fit_transform` also learns a 'full data' encoding using +the whole training set. This is never used in +:meth:`~TargetEncoder.fit_transform` but is saved to the attribute `encodings_`, +for use when :meth:`~TargetEncoder.transform` is called. Note that the encodings +learned for each fold during the :term:`cross fitting` scheme are not saved to +an attribute. + +The :meth:`~TargetEncoder.fit` method does **not** use any :term:`cross fitting` +schemes and learns one encoding on the entire training set, which is used to +encode categories in :meth:`~TargetEncoder.transform`. +This encoding is the same as the 'full data' +encoding learned in :meth:`~TargetEncoder.fit_transform`. + +.. note:: + :class:`TargetEncoder` considers missing values, such as `np.nan` or `None`, + as another category and encodes them like any other category. Categories + that are not seen during `fit` are encoded with the target mean, i.e. + `target_mean_`. + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py` + * :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder_cross_val.py` + +.. topic:: References + + .. [MIC] :doi:`Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality + categorical attributes in classification and prediction problems" + SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32. <10.1145/507533.507538>` + + .. [PAR] :doi:`Pargent, F., Pfisterer, F., Thomas, J. et al. "Regularized target + encoding outperforms traditional methods in supervised machine learning with + high cardinality features" Comput Stat 37, 2671–2692 (2022) + <10.1007/s00180-022-01207-6>` + .. _preprocessing_discretization: Discretization @@ -864,9 +1055,9 @@ For each feature, the bin edges are computed during ``fit`` and together with the number of bins, they will define the intervals. Therefore, for the current example, these intervals are defined as: - - feature 1: :math:`{[-\infty, -1), [-1, 2), [2, \infty)}` - - feature 2: :math:`{[-\infty, 5), [5, \infty)}` - - feature 3: :math:`{[-\infty, 14), [14, \infty)}` +- feature 1: :math:`{[-\infty, -1), [-1, 2), [2, \infty)}` +- feature 2: :math:`{[-\infty, 5), [5, \infty)}` +- feature 3: :math:`{[-\infty, 14), [14, \infty)}` Based on these bin intervals, ``X`` is transformed as follows:: @@ -894,6 +1085,8 @@ For instance, we can use the Pandas function :func:`pandas.cut`:: >>> import pandas as pd >>> import numpy as np + >>> from sklearn import preprocessing + >>> >>> bins = [0, 1, 13, 20, 60, np.inf] >>> labels = ['infant', 'kid', 'teen', 'adult', 'senior citizen'] >>> transformer = preprocessing.FunctionTransformer( @@ -1055,23 +1248,23 @@ below. Some of the advantages of splines over polynomials are: - - B-splines are very flexible and robust if you keep a fixed low degree, - usually 3, and parsimoniously adapt the number of knots. Polynomials - would need a higher degree, which leads to the next point. - - B-splines do not have oscillatory behaviour at the boundaries as have - polynomials (the higher the degree, the worse). This is known as `Runge's - phenomenon `_. - - B-splines provide good options for extrapolation beyond the boundaries, - i.e. beyond the range of fitted values. Have a look at the option - ``extrapolation``. - - B-splines generate a feature matrix with a banded structure. For a single - feature, every row contains only ``degree + 1`` non-zero elements, which - occur consecutively and are even positive. This results in a matrix with - good numerical properties, e.g. a low condition number, in sharp contrast - to a matrix of polynomials, which goes under the name - `Vandermonde matrix `_. - A low condition number is important for stable algorithms of linear - models. +- B-splines are very flexible and robust if you keep a fixed low degree, + usually 3, and parsimoniously adapt the number of knots. Polynomials + would need a higher degree, which leads to the next point. +- B-splines do not have oscillatory behaviour at the boundaries as have + polynomials (the higher the degree, the worse). This is known as `Runge's + phenomenon `_. +- B-splines provide good options for extrapolation beyond the boundaries, + i.e. beyond the range of fitted values. Have a look at the option + ``extrapolation``. +- B-splines generate a feature matrix with a banded structure. For a single + feature, every row contains only ``degree + 1`` non-zero elements, which + occur consecutively and are even positive. This results in a matrix with + good numerical properties, e.g. a low condition number, in sharp contrast + to a matrix of polynomials, which goes under the name + `Vandermonde matrix `_. + A low condition number is important for stable algorithms of linear + models. The following code snippet shows splines in action:: @@ -1106,7 +1299,9 @@ Interestingly, a :class:`SplineTransformer` of ``degree=0`` is the same as * :ref:`sphx_glr_auto_examples_linear_model_plot_polynomial_interpolation.py` * :ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py` -.. topic:: References: +|details-start| +**References** +|details-split| * Eilers, P., & Marx, B. (1996). :doi:`Flexible Smoothing with B-splines and Penalties <10.1214/ss/1038425655>`. Statist. Sci. 11 (1996), no. 2, 89--121. @@ -1115,6 +1310,8 @@ Interestingly, a :class:`SplineTransformer` of ``degree=0`` is the same as spline function procedures in R <10.1186/s12874-019-0666-3>`. BMC Med Res Methodol 19, 46 (2019). +|details-end| + .. _function_transformer: Custom transformers diff --git a/doc/modules/semi_supervised.rst b/doc/modules/semi_supervised.rst index 47e8bfffdd9a7..f8cae0a9ddcdf 100644 --- a/doc/modules/semi_supervised.rst +++ b/doc/modules/semi_supervised.rst @@ -121,11 +121,11 @@ Label propagation models have two built-in kernel methods. Choice of kernel effects both scalability and performance of the algorithms. The following are available: - * rbf (:math:`\exp(-\gamma |x-y|^2), \gamma > 0`). :math:`\gamma` is - specified by keyword gamma. +* rbf (:math:`\exp(-\gamma |x-y|^2), \gamma > 0`). :math:`\gamma` is + specified by keyword gamma. - * knn (:math:`1[x' \in kNN(x)]`). :math:`k` is specified by keyword - n_neighbors. +* knn (:math:`1[x' \in kNN(x)]`). :math:`k` is specified by keyword + n_neighbors. The RBF kernel will produce a fully connected graph which is represented in memory by a dense matrix. This matrix may be very large and combined with the cost of diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst index c50ed66868c1b..a7981e9d4ec28 100644 --- a/doc/modules/sgd.rst +++ b/doc/modules/sgd.rst @@ -36,16 +36,16 @@ different means. The advantages of Stochastic Gradient Descent are: - + Efficiency. ++ Efficiency. - + Ease of implementation (lots of opportunities for code tuning). ++ Ease of implementation (lots of opportunities for code tuning). The disadvantages of Stochastic Gradient Descent include: - + SGD requires a number of hyperparameters such as the regularization - parameter and the number of iterations. ++ SGD requires a number of hyperparameters such as the regularization + parameter and the number of iterations. - + SGD is sensitive to feature scaling. ++ SGD is sensitive to feature scaling. .. warning:: @@ -111,12 +111,12 @@ the coefficients and the input sample, plus the intercept) is given by The concrete loss function can be set via the ``loss`` parameter. :class:`SGDClassifier` supports the following loss functions: - * ``loss="hinge"``: (soft-margin) linear Support Vector Machine, - * ``loss="modified_huber"``: smoothed hinge loss, - * ``loss="log_loss"``: logistic regression, - * and all regression losses below. In this case the target is encoded as -1 - or 1, and the problem is treated as a regression problem. The predicted - class then correspond to the sign of the predicted target. +* ``loss="hinge"``: (soft-margin) linear Support Vector Machine, +* ``loss="modified_huber"``: smoothed hinge loss, +* ``loss="log_loss"``: logistic regression, +* and all regression losses below. In this case the target is encoded as -1 + or 1, and the problem is treated as a regression problem. The predicted + class then correspond to the sign of the predicted target. Please refer to the :ref:`mathematical section below ` for formulas. @@ -136,10 +136,10 @@ Using ``loss="log_loss"`` or ``loss="modified_huber"`` enables the The concrete penalty can be set via the ``penalty`` parameter. SGD supports the following penalties: - * ``penalty="l2"``: L2 norm penalty on ``coef_``. - * ``penalty="l1"``: L1 norm penalty on ``coef_``. - * ``penalty="elasticnet"``: Convex combination of L2 and L1; - ``(1 - l1_ratio) * L2 + l1_ratio * L1``. +* ``penalty="l2"``: L2 norm penalty on ``coef_``. +* ``penalty="l1"``: L1 norm penalty on ``coef_``. +* ``penalty="elasticnet"``: Convex combination of L2 and L1; + ``(1 - l1_ratio) * L2 + l1_ratio * L1``. The default setting is ``penalty="l2"``. The L1 penalty leads to sparse solutions, driving most coefficients to zero. The Elastic Net [#5]_ solves @@ -211,9 +211,9 @@ samples (> 10.000), for other problems we recommend :class:`Ridge`, The concrete loss function can be set via the ``loss`` parameter. :class:`SGDRegressor` supports the following loss functions: - * ``loss="squared_error"``: Ordinary least squares, - * ``loss="huber"``: Huber loss for robust regression, - * ``loss="epsilon_insensitive"``: linear Support Vector Regression. +* ``loss="squared_error"``: Ordinary least squares, +* ``loss="huber"``: Huber loss for robust regression, +* ``loss="epsilon_insensitive"``: linear Support Vector Regression. Please refer to the :ref:`mathematical section below ` for formulas. @@ -249,6 +249,10 @@ quadratic in the number of samples. with a large number of training samples (> 10,000) for which the SGD variant can be several orders of magnitude faster. +|details-start| +**Mathematical details** +|details-split| + Its implementation is based on the implementation of the stochastic gradient descent. Indeed, the original optimization problem of the One-Class SVM is given by @@ -282,6 +286,8 @@ This is similar to the optimization problems studied in section being the L2 norm. We just need to add the term :math:`b\nu` in the optimization loop. +|details-end| + As :class:`SGDClassifier` and :class:`SGDRegressor`, :class:`SGDOneClassSVM` supports averaged SGD. Averaging can be enabled by setting ``average=True``. @@ -321,14 +327,14 @@ Stopping criterion The classes :class:`SGDClassifier` and :class:`SGDRegressor` provide two criteria to stop the algorithm when a given level of convergence is reached: - * With ``early_stopping=True``, the input data is split into a training set - and a validation set. The model is then fitted on the training set, and the - stopping criterion is based on the prediction score (using the `score` - method) computed on the validation set. The size of the validation set - can be changed with the parameter ``validation_fraction``. - * With ``early_stopping=False``, the model is fitted on the entire input data - and the stopping criterion is based on the objective function computed on - the training data. +* With ``early_stopping=True``, the input data is split into a training set + and a validation set. The model is then fitted on the training set, and the + stopping criterion is based on the prediction score (using the `score` + method) computed on the validation set. The size of the validation set + can be changed with the parameter ``validation_fraction``. +* With ``early_stopping=False``, the model is fitted on the entire input data + and the stopping criterion is based on the objective function computed on + the training data. In both cases, the criterion is evaluated once by epoch, and the algorithm stops when the criterion does not improve ``n_iter_no_change`` times in a row. The @@ -339,45 +345,45 @@ stops in any case after a maximum number of iteration ``max_iter``. Tips on Practical Use ===================== - * Stochastic Gradient Descent is sensitive to feature scaling, so it - is highly recommended to scale your data. For example, scale each - attribute on the input vector X to [0,1] or [-1,+1], or standardize - it to have mean 0 and variance 1. Note that the *same* scaling - must be applied to the test vector to obtain meaningful - results. This can be easily done using :class:`StandardScaler`:: - - from sklearn.preprocessing import StandardScaler - scaler = StandardScaler() - scaler.fit(X_train) # Don't cheat - fit only on training data - X_train = scaler.transform(X_train) - X_test = scaler.transform(X_test) # apply same transformation to test data - - # Or better yet: use a pipeline! - from sklearn.pipeline import make_pipeline - est = make_pipeline(StandardScaler(), SGDClassifier()) - est.fit(X_train) - est.predict(X_test) - - If your attributes have an intrinsic scale (e.g. word frequencies or - indicator features) scaling is not needed. - - * Finding a reasonable regularization term :math:`\alpha` is - best done using automatic hyper-parameter search, e.g. - :class:`~sklearn.model_selection.GridSearchCV` or - :class:`~sklearn.model_selection.RandomizedSearchCV`, usually in the - range ``10.0**-np.arange(1,7)``. - - * Empirically, we found that SGD converges after observing - approximately 10^6 training samples. Thus, a reasonable first guess - for the number of iterations is ``max_iter = np.ceil(10**6 / n)``, - where ``n`` is the size of the training set. - - * If you apply SGD to features extracted using PCA we found that - it is often wise to scale the feature values by some constant `c` - such that the average L2 norm of the training data equals one. - - * We found that Averaged SGD works best with a larger number of features - and a higher eta0 +* Stochastic Gradient Descent is sensitive to feature scaling, so it + is highly recommended to scale your data. For example, scale each + attribute on the input vector X to [0,1] or [-1,+1], or standardize + it to have mean 0 and variance 1. Note that the *same* scaling must be + applied to the test vector to obtain meaningful results. This can be easily + done using :class:`~sklearn.preprocessing.StandardScaler`:: + + from sklearn.preprocessing import StandardScaler + scaler = StandardScaler() + scaler.fit(X_train) # Don't cheat - fit only on training data + X_train = scaler.transform(X_train) + X_test = scaler.transform(X_test) # apply same transformation to test data + + # Or better yet: use a pipeline! + from sklearn.pipeline import make_pipeline + est = make_pipeline(StandardScaler(), SGDClassifier()) + est.fit(X_train) + est.predict(X_test) + + If your attributes have an intrinsic scale (e.g. word frequencies or + indicator features) scaling is not needed. + +* Finding a reasonable regularization term :math:`\alpha` is + best done using automatic hyper-parameter search, e.g. + :class:`~sklearn.model_selection.GridSearchCV` or + :class:`~sklearn.model_selection.RandomizedSearchCV`, usually in the + range ``10.0**-np.arange(1,7)``. + +* Empirically, we found that SGD converges after observing + approximately 10^6 training samples. Thus, a reasonable first guess + for the number of iterations is ``max_iter = np.ceil(10**6 / n)``, + where ``n`` is the size of the training set. + +* If you apply SGD to features extracted using PCA we found that + it is often wise to scale the feature values by some constant `c` + such that the average L2 norm of the training data equals one. + +* We found that Averaged SGD works best with a larger number of features + and a higher eta0. .. topic:: References: @@ -410,6 +416,10 @@ where :math:`L` is a loss function that measures model (mis)fit and complexity; :math:`\alpha > 0` is a non-negative hyperparameter that controls the regularization strength. +|details-start| +**Loss functions details** +|details-split| + Different choices for :math:`L` entail different classifiers or regressors: - Hinge (soft-margin): equivalent to Support Vector Classification. @@ -418,7 +428,7 @@ Different choices for :math:`L` entail different classifiers or regressors: :math:`L(y_i, f(x_i)) = \max(0, - y_i f(x_i))`. - Modified Huber: :math:`L(y_i, f(x_i)) = \max(0, 1 - y_i f(x_i))^2` if :math:`y_i f(x_i) > - 1`, and :math:`L(y_i, f(x_i)) = -4 y_i f(x_i)` otherwise. + -1`, and :math:`L(y_i, f(x_i)) = -4 y_i f(x_i)` otherwise. - Log Loss: equivalent to Logistic Regression. :math:`L(y_i, f(x_i)) = \log(1 + \exp (-y_i f(x_i)))`. - Squared Error: Linear regression (Ridge or Lasso depending on @@ -431,6 +441,8 @@ Different choices for :math:`L` entail different classifiers or regressors: - Epsilon-Insensitive: (soft-margin) equivalent to Support Vector Regression. :math:`L(y_i, f(x_i)) = \max(0, |y_i - f(x_i)| - \varepsilon)`. +|details-end| + All of the above loss functions can be regarded as an upper bound on the misclassification error (Zero-one loss) as shown in the Figure below. @@ -442,12 +454,12 @@ misclassification error (Zero-one loss) as shown in the Figure below. Popular choices for the regularization term :math:`R` (the `penalty` parameter) include: - - L2 norm: :math:`R(w) := \frac{1}{2} \sum_{j=1}^{m} w_j^2 = ||w||_2^2`, - - L1 norm: :math:`R(w) := \sum_{j=1}^{m} |w_j|`, which leads to sparse - solutions. - - Elastic Net: :math:`R(w) := \frac{\rho}{2} \sum_{j=1}^{n} w_j^2 + - (1-\rho) \sum_{j=1}^{m} |w_j|`, a convex combination of L2 and L1, where - :math:`\rho` is given by ``1 - l1_ratio``. +- L2 norm: :math:`R(w) := \frac{1}{2} \sum_{j=1}^{m} w_j^2 = ||w||_2^2`, +- L1 norm: :math:`R(w) := \sum_{j=1}^{m} |w_j|`, which leads to sparse + solutions. +- Elastic Net: :math:`R(w) := \frac{\rho}{2} \sum_{j=1}^{n} w_j^2 + + (1-\rho) \sum_{j=1}^{m} |w_j|`, a convex combination of L2 and L1, where + :math:`\rho` is given by ``1 - l1_ratio``. The Figure below shows the contours of the different regularization terms in a 2-dimensional parameter space (:math:`m=2`) when :math:`R(w) = 1`. @@ -491,7 +503,7 @@ where :math:`t` is the time step (there are a total of `n_samples * n_iter` time steps), :math:`t_0` is determined based on a heuristic proposed by Léon Bottou such that the expected initial updates are comparable with the expected size of the weights (this assuming that the norm of the training samples is -approx. 1). The exact definition can be found in ``_init_t`` in :class:`BaseSGD`. +approx. 1). The exact definition can be found in ``_init_t`` in `BaseSGD`. For regression the default learning rate schedule is inverse scaling diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst index b6932c45e40f3..e3bc1395819e9 100644 --- a/doc/modules/svm.rst +++ b/doc/modules/svm.rst @@ -16,27 +16,27 @@ methods used for :ref:`classification `, The advantages of support vector machines are: - - Effective in high dimensional spaces. +- Effective in high dimensional spaces. - - Still effective in cases where number of dimensions is greater - than the number of samples. +- Still effective in cases where number of dimensions is greater + than the number of samples. - - Uses a subset of training points in the decision function (called - support vectors), so it is also memory efficient. +- Uses a subset of training points in the decision function (called + support vectors), so it is also memory efficient. - - Versatile: different :ref:`svm_kernels` can be - specified for the decision function. Common kernels are - provided, but it is also possible to specify custom kernels. +- Versatile: different :ref:`svm_kernels` can be + specified for the decision function. Common kernels are + provided, but it is also possible to specify custom kernels. The disadvantages of support vector machines include: - - If the number of features is much greater than the number of - samples, avoid over-fitting in choosing :ref:`svm_kernels` and regularization - term is crucial. +- If the number of features is much greater than the number of + samples, avoid over-fitting in choosing :ref:`svm_kernels` and regularization + term is crucial. - - SVMs do not directly provide probability estimates, these are - calculated using an expensive five-fold cross-validation - (see :ref:`Scores and probabilities `, below). +- SVMs do not directly provide probability estimates, these are + calculated using an expensive five-fold cross-validation + (see :ref:`Scores and probabilities `, below). The support vector machines in scikit-learn support both dense (``numpy.ndarray`` and convertible to that by ``numpy.asarray``) and @@ -60,14 +60,19 @@ capable of performing binary and multi-class classification on a dataset. :align: center -:class:`SVC` and :class:`NuSVC` are similar methods, but accept -slightly different sets of parameters and have different mathematical -formulations (see section :ref:`svm_mathematical_formulation`). On the -other hand, :class:`LinearSVC` is another (faster) implementation of Support -Vector Classification for the case of a linear kernel. Note that -:class:`LinearSVC` does not accept parameter ``kernel``, as this is -assumed to be linear. It also lacks some of the attributes of -:class:`SVC` and :class:`NuSVC`, like ``support_``. +:class:`SVC` and :class:`NuSVC` are similar methods, but accept slightly +different sets of parameters and have different mathematical formulations (see +section :ref:`svm_mathematical_formulation`). On the other hand, +:class:`LinearSVC` is another (faster) implementation of Support Vector +Classification for the case of a linear kernel. It also +lacks some of the attributes of :class:`SVC` and :class:`NuSVC`, like +`support_`. :class:`LinearSVC` uses `squared_hinge` loss and due to its +implementation in `liblinear` it also regularizes the intercept, if considered. +This effect can however be reduced by carefully fine tuning its +`intercept_scaling` parameter, which allows the intercept term to have a +different regularization behavior compared to the other features. The +classification results and score can therefore differ from the other two +classifiers. As other classifiers, :class:`SVC`, :class:`NuSVC` and :class:`LinearSVC` take as input two arrays: an array `X` of shape @@ -129,7 +134,7 @@ function of shape ``(n_samples, n_classes)``. >>> clf.fit(X, Y) SVC(decision_function_shape='ovo') >>> dec = clf.decision_function([[1]]) - >>> dec.shape[1] # 4 classes: 4*3/2 = 6 + >>> dec.shape[1] # 6 classes: 4*3/2 = 6 6 >>> clf.decision_function_shape = "ovr" >>> dec = clf.decision_function([[1]]) @@ -149,6 +154,10 @@ multi-class strategy, thus training `n_classes` models. See :ref:`svm_mathematical_formulation` for a complete description of the decision function. +|details-start| +**Details on multi-class strategies** +|details-split| + Note that the :class:`LinearSVC` also implements an alternative multi-class strategy, the so-called multi-class SVM formulated by Crammer and Singer [#8]_, by using the option ``multi_class='crammer_singer'``. In practice, @@ -199,6 +208,8 @@ Then ``dual_coef_`` looks like this: |for SVs of class 0 |for SVs of class 1 |for SVs of class 2 | +--------------------------------------------------------------------------+-------------------------------------------------+-------------------------------------------------+ +|details-end| + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py`, @@ -308,10 +319,15 @@ target. There are three different implementations of Support Vector Regression: :class:`SVR`, :class:`NuSVR` and :class:`LinearSVR`. :class:`LinearSVR` -provides a faster implementation than :class:`SVR` but only considers -the linear kernel, while :class:`NuSVR` implements a slightly different -formulation than :class:`SVR` and :class:`LinearSVR`. See -:ref:`svm_implementation_details` for further details. +provides a faster implementation than :class:`SVR` but only considers the +linear kernel, while :class:`NuSVR` implements a slightly different formulation +than :class:`SVR` and :class:`LinearSVR`. Due to its implementation in +`liblinear` :class:`LinearSVR` also regularizes the intercept, if considered. +This effect can however be reduced by carefully fine tuning its +`intercept_scaling` parameter, which allows the intercept term to have a +different regularization behavior compared to the other features. The +classification results and score can therefore differ from the other two +classifiers. See :ref:`svm_implementation_details` for further details. As with classification classes, the fit method will take as argument vectors X, y, only that in this case y is expected to have @@ -365,95 +381,95 @@ Tips on Practical Use ===================== - * **Avoiding data copy**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and - :class:`NuSVR`, if the data passed to certain methods is not C-ordered - contiguous and double precision, it will be copied before calling the - underlying C implementation. You can check whether a given numpy array is - C-contiguous by inspecting its ``flags`` attribute. - - For :class:`LinearSVC` (and :class:`LogisticRegression - `) any input passed as a numpy - array will be copied and converted to the `liblinear`_ internal sparse data - representation (double precision floats and int32 indices of non-zero - components). If you want to fit a large-scale linear classifier without - copying a dense numpy C-contiguous double precision array as input, we - suggest to use the :class:`SGDClassifier - ` class instead. The objective - function can be configured to be almost the same as the :class:`LinearSVC` - model. - - * **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and - :class:`NuSVR`, the size of the kernel cache has a strong impact on run - times for larger problems. If you have enough RAM available, it is - recommended to set ``cache_size`` to a higher value than the default of - 200(MB), such as 500(MB) or 1000(MB). - - - * **Setting C**: ``C`` is ``1`` by default and it's a reasonable default - choice. If you have a lot of noisy observations you should decrease it: - decreasing C corresponds to more regularization. - - :class:`LinearSVC` and :class:`LinearSVR` are less sensitive to ``C`` when - it becomes large, and prediction results stop improving after a certain - threshold. Meanwhile, larger ``C`` values will take more time to train, - sometimes up to 10 times longer, as shown in [#3]_. - - * Support Vector Machine algorithms are not scale invariant, so **it - is highly recommended to scale your data**. For example, scale each - attribute on the input vector X to [0,1] or [-1,+1], or standardize it - to have mean 0 and variance 1. Note that the *same* scaling must be - applied to the test vector to obtain meaningful results. This can be done - easily by using a :class:`~sklearn.pipeline.Pipeline`:: - - >>> from sklearn.pipeline import make_pipeline - >>> from sklearn.preprocessing import StandardScaler - >>> from sklearn.svm import SVC - - >>> clf = make_pipeline(StandardScaler(), SVC()) - - See section :ref:`preprocessing` for more details on scaling and - normalization. - - .. _shrinking_svm: - - * Regarding the `shrinking` parameter, quoting [#4]_: *We found that if the - number of iterations is large, then shrinking can shorten the training - time. However, if we loosely solve the optimization problem (e.g., by - using a large stopping tolerance), the code without using shrinking may - be much faster* - - * Parameter ``nu`` in :class:`NuSVC`/:class:`OneClassSVM`/:class:`NuSVR` - approximates the fraction of training errors and support vectors. - - * In :class:`SVC`, if the data is unbalanced (e.g. many - positive and few negative), set ``class_weight='balanced'`` and/or try - different penalty parameters ``C``. - - * **Randomness of the underlying implementations**: The underlying - implementations of :class:`SVC` and :class:`NuSVC` use a random number - generator only to shuffle the data for probability estimation (when - ``probability`` is set to ``True``). This randomness can be controlled - with the ``random_state`` parameter. If ``probability`` is set to ``False`` - these estimators are not random and ``random_state`` has no effect on the - results. The underlying :class:`OneClassSVM` implementation is similar to - the ones of :class:`SVC` and :class:`NuSVC`. As no probability estimation - is provided for :class:`OneClassSVM`, it is not random. - - The underlying :class:`LinearSVC` implementation uses a random number - generator to select features when fitting the model with a dual coordinate - descent (i.e when ``dual`` is set to ``True``). It is thus not uncommon - to have slightly different results for the same input data. If that - happens, try with a smaller `tol` parameter. This randomness can also be - controlled with the ``random_state`` parameter. When ``dual`` is - set to ``False`` the underlying implementation of :class:`LinearSVC` is - not random and ``random_state`` has no effect on the results. - - * Using L1 penalization as provided by ``LinearSVC(penalty='l1', - dual=False)`` yields a sparse solution, i.e. only a subset of feature - weights is different from zero and contribute to the decision function. - Increasing ``C`` yields a more complex model (more features are selected). - The ``C`` value that yields a "null" model (all weights equal to zero) can - be calculated using :func:`l1_min_c`. +* **Avoiding data copy**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and + :class:`NuSVR`, if the data passed to certain methods is not C-ordered + contiguous and double precision, it will be copied before calling the + underlying C implementation. You can check whether a given numpy array is + C-contiguous by inspecting its ``flags`` attribute. + + For :class:`LinearSVC` (and :class:`LogisticRegression + `) any input passed as a numpy + array will be copied and converted to the `liblinear`_ internal sparse data + representation (double precision floats and int32 indices of non-zero + components). If you want to fit a large-scale linear classifier without + copying a dense numpy C-contiguous double precision array as input, we + suggest to use the :class:`SGDClassifier + ` class instead. The objective + function can be configured to be almost the same as the :class:`LinearSVC` + model. + +* **Kernel cache size**: For :class:`SVC`, :class:`SVR`, :class:`NuSVC` and + :class:`NuSVR`, the size of the kernel cache has a strong impact on run + times for larger problems. If you have enough RAM available, it is + recommended to set ``cache_size`` to a higher value than the default of + 200(MB), such as 500(MB) or 1000(MB). + + +* **Setting C**: ``C`` is ``1`` by default and it's a reasonable default + choice. If you have a lot of noisy observations you should decrease it: + decreasing C corresponds to more regularization. + + :class:`LinearSVC` and :class:`LinearSVR` are less sensitive to ``C`` when + it becomes large, and prediction results stop improving after a certain + threshold. Meanwhile, larger ``C`` values will take more time to train, + sometimes up to 10 times longer, as shown in [#3]_. + +* Support Vector Machine algorithms are not scale invariant, so **it + is highly recommended to scale your data**. For example, scale each + attribute on the input vector X to [0,1] or [-1,+1], or standardize it + to have mean 0 and variance 1. Note that the *same* scaling must be + applied to the test vector to obtain meaningful results. This can be done + easily by using a :class:`~sklearn.pipeline.Pipeline`:: + + >>> from sklearn.pipeline import make_pipeline + >>> from sklearn.preprocessing import StandardScaler + >>> from sklearn.svm import SVC + + >>> clf = make_pipeline(StandardScaler(), SVC()) + + See section :ref:`preprocessing` for more details on scaling and + normalization. + +.. _shrinking_svm: + +* Regarding the `shrinking` parameter, quoting [#4]_: *We found that if the + number of iterations is large, then shrinking can shorten the training + time. However, if we loosely solve the optimization problem (e.g., by + using a large stopping tolerance), the code without using shrinking may + be much faster* + +* Parameter ``nu`` in :class:`NuSVC`/:class:`OneClassSVM`/:class:`NuSVR` + approximates the fraction of training errors and support vectors. + +* In :class:`SVC`, if the data is unbalanced (e.g. many + positive and few negative), set ``class_weight='balanced'`` and/or try + different penalty parameters ``C``. + +* **Randomness of the underlying implementations**: The underlying + implementations of :class:`SVC` and :class:`NuSVC` use a random number + generator only to shuffle the data for probability estimation (when + ``probability`` is set to ``True``). This randomness can be controlled + with the ``random_state`` parameter. If ``probability`` is set to ``False`` + these estimators are not random and ``random_state`` has no effect on the + results. The underlying :class:`OneClassSVM` implementation is similar to + the ones of :class:`SVC` and :class:`NuSVC`. As no probability estimation + is provided for :class:`OneClassSVM`, it is not random. + + The underlying :class:`LinearSVC` implementation uses a random number + generator to select features when fitting the model with a dual coordinate + descent (i.e. when ``dual`` is set to ``True``). It is thus not uncommon + to have slightly different results for the same input data. If that + happens, try with a smaller `tol` parameter. This randomness can also be + controlled with the ``random_state`` parameter. When ``dual`` is + set to ``False`` the underlying implementation of :class:`LinearSVC` is + not random and ``random_state`` has no effect on the results. + +* Using L1 penalization as provided by ``LinearSVC(penalty='l1', + dual=False)`` yields a sparse solution, i.e. only a subset of feature + weights is different from zero and contribute to the decision function. + Increasing ``C`` yields a more complex model (more features are selected). + The ``C`` value that yields a "null" model (all weights equal to zero) can + be calculated using :func:`l1_min_c`. .. _svm_kernels: @@ -463,16 +479,16 @@ Kernel functions The *kernel function* can be any of the following: - * linear: :math:`\langle x, x'\rangle`. +* linear: :math:`\langle x, x'\rangle`. - * polynomial: :math:`(\gamma \langle x, x'\rangle + r)^d`, where - :math:`d` is specified by parameter ``degree``, :math:`r` by ``coef0``. +* polynomial: :math:`(\gamma \langle x, x'\rangle + r)^d`, where + :math:`d` is specified by parameter ``degree``, :math:`r` by ``coef0``. - * rbf: :math:`\exp(-\gamma \|x-x'\|^2)`, where :math:`\gamma` is - specified by parameter ``gamma``, must be greater than 0. +* rbf: :math:`\exp(-\gamma \|x-x'\|^2)`, where :math:`\gamma` is + specified by parameter ``gamma``, must be greater than 0. - * sigmoid :math:`\tanh(\gamma \langle x,x'\rangle + r)`, - where :math:`r` is specified by ``coef0``. +* sigmoid :math:`\tanh(\gamma \langle x,x'\rangle + r)`, + where :math:`r` is specified by ``coef0``. Different kernels are specified by the `kernel` parameter:: @@ -504,7 +520,7 @@ is advised to use :class:`~sklearn.model_selection.GridSearchCV` with * :ref:`sphx_glr_auto_examples_svm_plot_rbf_parameters.py` * :ref:`sphx_glr_auto_examples_svm_plot_svm_nonlinear.py` - + * :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py` Custom Kernels -------------- @@ -515,16 +531,17 @@ python function or by precomputing the Gram matrix. Classifiers with custom kernels behave the same way as any other classifiers, except that: - * Field ``support_vectors_`` is now empty, only indices of support - vectors are stored in ``support_`` +* Field ``support_vectors_`` is now empty, only indices of support + vectors are stored in ``support_`` - * A reference (and not a copy) of the first argument in the ``fit()`` - method is stored for future reference. If that array changes between the - use of ``fit()`` and ``predict()`` you will have unexpected results. +* A reference (and not a copy) of the first argument in the ``fit()`` + method is stored for future reference. If that array changes between the + use of ``fit()`` and ``predict()`` you will have unexpected results. -Using Python functions as kernels -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Using Python functions as kernels** +|details-split| You can use your own defined kernels by passing a function to the ``kernel`` parameter. @@ -543,12 +560,12 @@ instance that will use that kernel:: ... >>> clf = svm.SVC(kernel=my_kernel) -.. topic:: Examples: +|details-end| - * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`. -Using the Gram matrix -~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Using the Gram matrix** +|details-split| You can pass pre-computed kernels by using the ``kernel='precomputed'`` option. You should then pass Gram matrix instead of X to the `fit` and @@ -571,6 +588,11 @@ test vectors must be provided: >>> clf.predict(gram_test) array([0, 1, 0]) +|details-end| + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`. .. _svm_mathematical_formulation: @@ -667,8 +689,9 @@ term :math:`b` estimator used is :class:`~sklearn.linear_model.Ridge` regression, the relation between them is given as :math:`C = \frac{1}{alpha}`. -LinearSVC ---------- +|details-start| +**LinearSVC** +|details-split| The primal problem can be equivalently formulated as @@ -683,10 +706,13 @@ does not involve inner products between samples, so the famous kernel trick cannot be applied. This is why only the linear kernel is supported by :class:`LinearSVC` (:math:`\phi` is the identity function). +|details-end| + .. _nu_svc: -NuSVC ------ +|details-start| +**NuSVC** +|details-split| The :math:`\nu`-SVC formulation [#7]_ is a reparameterization of the :math:`C`-SVC and therefore mathematically equivalent. @@ -699,6 +725,7 @@ to a sample that lies on the wrong side of its margin boundary: it is either misclassified, or it is correctly classified but does not lie beyond the margin. +|details-end| SVR --- @@ -747,8 +774,9 @@ which holds the difference :math:`\alpha_i - \alpha_i^*`, ``support_vectors_`` w holds the support vectors, and ``intercept_`` which holds the independent term :math:`b` -LinearSVR ---------- +|details-start| +**LinearSVR** +|details-split| The primal problem can be equivalently formulated as @@ -760,6 +788,8 @@ where we make use of the epsilon-insensitive loss, i.e. errors of less than :math:`\varepsilon` are ignored. This is the form that is directly optimized by :class:`LinearSVR`. +|details-end| + .. _svm_implementation_details: Implementation details diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index 28bcd07ab978d..b54b913573a34 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -23,68 +23,68 @@ the tree, the more complex the decision rules and the fitter the model. Some advantages of decision trees are: - - Simple to understand and to interpret. Trees can be visualized. +- Simple to understand and to interpret. Trees can be visualized. - - Requires little data preparation. Other techniques often require data - normalization, dummy variables need to be created and blank values to - be removed. Note however that this module does not support missing - values. +- Requires little data preparation. Other techniques often require data + normalization, dummy variables need to be created and blank values to + be removed. Some tree and algorithm combinations support + :ref:`missing values `. - - The cost of using the tree (i.e., predicting data) is logarithmic in the - number of data points used to train the tree. +- The cost of using the tree (i.e., predicting data) is logarithmic in the + number of data points used to train the tree. - - Able to handle both numerical and categorical data. However, the scikit-learn - implementation does not support categorical variables for now. Other - techniques are usually specialized in analyzing datasets that have only one type - of variable. See :ref:`algorithms ` for more - information. +- Able to handle both numerical and categorical data. However, the scikit-learn + implementation does not support categorical variables for now. Other + techniques are usually specialized in analyzing datasets that have only one type + of variable. See :ref:`algorithms ` for more + information. - - Able to handle multi-output problems. +- Able to handle multi-output problems. - - Uses a white box model. If a given situation is observable in a model, - the explanation for the condition is easily explained by boolean logic. - By contrast, in a black box model (e.g., in an artificial neural - network), results may be more difficult to interpret. +- Uses a white box model. If a given situation is observable in a model, + the explanation for the condition is easily explained by boolean logic. + By contrast, in a black box model (e.g., in an artificial neural + network), results may be more difficult to interpret. - - Possible to validate a model using statistical tests. That makes it - possible to account for the reliability of the model. +- Possible to validate a model using statistical tests. That makes it + possible to account for the reliability of the model. - - Performs well even if its assumptions are somewhat violated by - the true model from which the data were generated. +- Performs well even if its assumptions are somewhat violated by + the true model from which the data were generated. The disadvantages of decision trees include: - - Decision-tree learners can create over-complex trees that do not - generalize the data well. This is called overfitting. Mechanisms - such as pruning, setting the minimum number of samples required - at a leaf node or setting the maximum depth of the tree are - necessary to avoid this problem. +- Decision-tree learners can create over-complex trees that do not + generalize the data well. This is called overfitting. Mechanisms + such as pruning, setting the minimum number of samples required + at a leaf node or setting the maximum depth of the tree are + necessary to avoid this problem. - - Decision trees can be unstable because small variations in the - data might result in a completely different tree being generated. - This problem is mitigated by using decision trees within an - ensemble. +- Decision trees can be unstable because small variations in the + data might result in a completely different tree being generated. + This problem is mitigated by using decision trees within an + ensemble. - - Predictions of decision trees are neither smooth nor continuous, but - piecewise constant approximations as seen in the above figure. Therefore, - they are not good at extrapolation. +- Predictions of decision trees are neither smooth nor continuous, but + piecewise constant approximations as seen in the above figure. Therefore, + they are not good at extrapolation. - - The problem of learning an optimal decision tree is known to be - NP-complete under several aspects of optimality and even for simple - concepts. Consequently, practical decision-tree learning algorithms - are based on heuristic algorithms such as the greedy algorithm where - locally optimal decisions are made at each node. Such algorithms - cannot guarantee to return the globally optimal decision tree. This - can be mitigated by training multiple trees in an ensemble learner, - where the features and samples are randomly sampled with replacement. +- The problem of learning an optimal decision tree is known to be + NP-complete under several aspects of optimality and even for simple + concepts. Consequently, practical decision-tree learning algorithms + are based on heuristic algorithms such as the greedy algorithm where + locally optimal decisions are made at each node. Such algorithms + cannot guarantee to return the globally optimal decision tree. This + can be mitigated by training multiple trees in an ensemble learner, + where the features and samples are randomly sampled with replacement. - - There are concepts that are hard to learn because decision trees - do not express them easily, such as XOR, parity or multiplexer problems. +- There are concepts that are hard to learn because decision trees + do not express them easily, such as XOR, parity or multiplexer problems. - - Decision tree learners create biased trees if some classes dominate. - It is therefore recommended to balance the dataset prior to fitting - with the decision tree. +- Decision tree learners create biased trees if some classes dominate. + It is therefore recommended to balance the dataset prior to fitting + with the decision tree. .. _tree_classification: @@ -146,6 +146,10 @@ Once trained, you can plot the tree with the :func:`plot_tree` function:: :scale: 75 :align: center +|details-start| +**Alternative ways to export trees** +|details-split| + We can also export the tree in `Graphviz `_ format using the :func:`export_graphviz` exporter. If you use the `conda `_ package manager, the graphviz binaries @@ -212,6 +216,8 @@ of external libraries and is more compact: | | |--- class: 2 +|details-end| + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py` @@ -267,20 +273,19 @@ generalization accuracy of the resulting estimator may often be increased. With regard to decision trees, this strategy can readily be used to support multi-output problems. This requires the following changes: - - Store n output values in leaves, instead of 1; - - Use splitting criteria that compute the average reduction across all - n outputs. +- Store n output values in leaves, instead of 1; +- Use splitting criteria that compute the average reduction across all + n outputs. This module offers support for multi-output problems by implementing this strategy in both :class:`DecisionTreeClassifier` and :class:`DecisionTreeRegressor`. If a decision tree is fit on an output array Y of shape ``(n_samples, n_outputs)`` then the resulting estimator will: - * Output n_output values upon ``predict``; - - * Output a list of n_output arrays of class probabilities upon - ``predict_proba``. +* Output n_output values upon ``predict``; +* Output a list of n_output arrays of class probabilities upon + ``predict_proba``. The use of multi-output trees for regression is demonstrated in :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py`. In this example, the input @@ -303,15 +308,19 @@ the lower half of those faces. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py` - * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` + * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py` + * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py` + +|details-start| +**References** +|details-split| -.. topic:: References: +* M. Dumont et al, `Fast multi-class image annotation with random subwindows + and multiple output randomized trees + `_, International Conference on + Computer Vision Theory and Applications 2009 - * M. Dumont et al, `Fast multi-class image annotation with random subwindows - and multiple output randomized trees - `_, International Conference on - Computer Vision Theory and Applications 2009 +|details-end| .. _tree_complexity: @@ -334,65 +343,65 @@ total cost over the entire trees (by summing the cost at each node) of Tips on practical use ===================== - * Decision trees tend to overfit on data with a large number of features. - Getting the right ratio of samples to number of features is important, since - a tree with few samples in high dimensional space is very likely to overfit. - - * Consider performing dimensionality reduction (:ref:`PCA `, - :ref:`ICA `, or :ref:`feature_selection`) beforehand to - give your tree a better chance of finding features that are discriminative. - - * :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` will help - in gaining more insights about how the decision tree makes predictions, which is - important for understanding the important features in the data. - - * Visualize your tree as you are training by using the ``export`` - function. Use ``max_depth=3`` as an initial tree depth to get a feel for - how the tree is fitting to your data, and then increase the depth. - - * Remember that the number of samples required to populate the tree doubles - for each additional level the tree grows to. Use ``max_depth`` to control - the size of the tree to prevent overfitting. - - * Use ``min_samples_split`` or ``min_samples_leaf`` to ensure that multiple - samples inform every decision in the tree, by controlling which splits will - be considered. A very small number will usually mean the tree will overfit, - whereas a large number will prevent the tree from learning the data. Try - ``min_samples_leaf=5`` as an initial value. If the sample size varies - greatly, a float number can be used as percentage in these two parameters. - While ``min_samples_split`` can create arbitrarily small leaves, - ``min_samples_leaf`` guarantees that each leaf has a minimum size, avoiding - low-variance, over-fit leaf nodes in regression problems. For - classification with few classes, ``min_samples_leaf=1`` is often the best - choice. - - Note that ``min_samples_split`` considers samples directly and independent of - ``sample_weight``, if provided (e.g. a node with m weighted samples is still - treated as having exactly m samples). Consider ``min_weight_fraction_leaf`` or - ``min_impurity_decrease`` if accounting for sample weights is required at splits. - - * Balance your dataset before training to prevent the tree from being biased - toward the classes that are dominant. Class balancing can be done by - sampling an equal number of samples from each class, or preferably by - normalizing the sum of the sample weights (``sample_weight``) for each - class to the same value. Also note that weight-based pre-pruning criteria, - such as ``min_weight_fraction_leaf``, will then be less biased toward - dominant classes than criteria that are not aware of the sample weights, - like ``min_samples_leaf``. - - * If the samples are weighted, it will be easier to optimize the tree - structure using weight-based pre-pruning criterion such as - ``min_weight_fraction_leaf``, which ensure that leaf nodes contain at least - a fraction of the overall sum of the sample weights. - - * All decision trees use ``np.float32`` arrays internally. - If training data is not in this format, a copy of the dataset will be made. - - * If the input matrix X is very sparse, it is recommended to convert to sparse - ``csc_matrix`` before calling fit and sparse ``csr_matrix`` before calling - predict. Training time can be orders of magnitude faster for a sparse - matrix input compared to a dense matrix when features have zero values in - most of the samples. +* Decision trees tend to overfit on data with a large number of features. + Getting the right ratio of samples to number of features is important, since + a tree with few samples in high dimensional space is very likely to overfit. + +* Consider performing dimensionality reduction (:ref:`PCA `, + :ref:`ICA `, or :ref:`feature_selection`) beforehand to + give your tree a better chance of finding features that are discriminative. + +* :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` will help + in gaining more insights about how the decision tree makes predictions, which is + important for understanding the important features in the data. + +* Visualize your tree as you are training by using the ``export`` + function. Use ``max_depth=3`` as an initial tree depth to get a feel for + how the tree is fitting to your data, and then increase the depth. + +* Remember that the number of samples required to populate the tree doubles + for each additional level the tree grows to. Use ``max_depth`` to control + the size of the tree to prevent overfitting. + +* Use ``min_samples_split`` or ``min_samples_leaf`` to ensure that multiple + samples inform every decision in the tree, by controlling which splits will + be considered. A very small number will usually mean the tree will overfit, + whereas a large number will prevent the tree from learning the data. Try + ``min_samples_leaf=5`` as an initial value. If the sample size varies + greatly, a float number can be used as percentage in these two parameters. + While ``min_samples_split`` can create arbitrarily small leaves, + ``min_samples_leaf`` guarantees that each leaf has a minimum size, avoiding + low-variance, over-fit leaf nodes in regression problems. For + classification with few classes, ``min_samples_leaf=1`` is often the best + choice. + + Note that ``min_samples_split`` considers samples directly and independent of + ``sample_weight``, if provided (e.g. a node with m weighted samples is still + treated as having exactly m samples). Consider ``min_weight_fraction_leaf`` or + ``min_impurity_decrease`` if accounting for sample weights is required at splits. + +* Balance your dataset before training to prevent the tree from being biased + toward the classes that are dominant. Class balancing can be done by + sampling an equal number of samples from each class, or preferably by + normalizing the sum of the sample weights (``sample_weight``) for each + class to the same value. Also note that weight-based pre-pruning criteria, + such as ``min_weight_fraction_leaf``, will then be less biased toward + dominant classes than criteria that are not aware of the sample weights, + like ``min_samples_leaf``. + +* If the samples are weighted, it will be easier to optimize the tree + structure using weight-based pre-pruning criterion such as + ``min_weight_fraction_leaf``, which ensure that leaf nodes contain at least + a fraction of the overall sum of the sample weights. + +* All decision trees use ``np.float32`` arrays internally. + If training data is not in this format, a copy of the dataset will be made. + +* If the input matrix X is very sparse, it is recommended to convert to sparse + ``csc_matrix`` before calling fit and sparse ``csr_matrix`` before calling + predict. Training time can be orders of magnitude faster for a sparse + matrix input compared to a dense matrix when features have zero values in + most of the samples. .. _tree_algorithms: @@ -403,6 +412,10 @@ Tree algorithms: ID3, C4.5, C5.0 and CART What are all the various decision tree algorithms and how do they differ from each other? Which one is implemented in scikit-learn? +|details-start| +**Various decision tree algorithms** +|details-split| + ID3_ (Iterative Dichotomiser 3) was developed in 1986 by Ross Quinlan. The algorithm creates a multiway tree, finding for each node (i.e. in a greedy manner) the categorical feature that will yield the largest @@ -428,6 +441,8 @@ it differs in that it supports numerical target variables (regression) and does not compute rule sets. CART constructs binary trees using the feature and threshold that yield the largest information gain at each node. +|details-end| + scikit-learn uses an optimized version of the CART algorithm; however, the scikit-learn implementation does not support categorical variables for now. @@ -500,36 +515,39 @@ Log Loss or Entropy: H(Q_m) = - \sum_k p_{mk} \log(p_{mk}) +|details-start| +**Shannon entropy** +|details-split| -.. note:: +The entropy criterion computes the Shannon entropy of the possible classes. It +takes the class frequencies of the training data points that reached a given +leaf :math:`m` as their probability. Using the **Shannon entropy as tree node +splitting criterion is equivalent to minimizing the log loss** (also known as +cross-entropy and multinomial deviance) between the true labels :math:`y_i` +and the probabilistic predictions :math:`T_k(x_i)` of the tree model :math:`T` for class :math:`k`. - The entropy criterion computes the Shannon entropy of the possible classes. It - takes the class frequencies of the training data points that reached a given - leaf :math:`m` as their probability. Using the **Shannon entropy as tree node - splitting criterion is equivalent to minimizing the log loss** (also known as - cross-entropy and multinomial deviance) between the true labels :math:`y_i` - and the probalistic predictions :math:`T_k(x_i)` of the tree model :math:`T` for class :math:`k`. +To see this, first recall that the log loss of a tree model :math:`T` +computed on a dataset :math:`D` is defined as follows: - To see this, first recall that the log loss of a tree model :math:`T` - computed on a dataset :math:`D` is defined as follows: +.. math:: - .. math:: + \mathrm{LL}(D, T) = -\frac{1}{n} \sum_{(x_i, y_i) \in D} \sum_k I(y_i = k) \log(T_k(x_i)) - \mathrm{LL}(D, T) = -\frac{1}{n} \sum_{(x_i, y_i) \in D} \sum_k I(y_i = k) \log(T_k(x_i)) +where :math:`D` is a training dataset of :math:`n` pairs :math:`(x_i, y_i)`. - where :math:`D` is a training dataset of :math:`n` pairs :math:`(x_i, y_i)`. +In a classification tree, the predicted class probabilities within leaf nodes +are constant, that is: for all :math:`(x_i, y_i) \in Q_m`, one has: +:math:`T_k(x_i) = p_{mk}` for each class :math:`k`. - In a classification tree, the predicted class probabilities within leaf nodes - are constant, that is: for all :math:`(x_i, y_i) \in Q_m`, one has: - :math:`T_k(x_i) = p_{mk}` for each class :math:`k`. +This property makes it possible to rewrite :math:`\mathrm{LL}(D, T)` as the +sum of the Shannon entropies computed for each leaf of :math:`T` weighted by +the number of training data points that reached each leaf: - This property makes it possible to rewrite :math:`\mathrm{LL}(D, T)` as the - sum of the Shannon entropies computed for each leaf of :math:`T` weighted by - the number of training data points that reached each leaf: +.. math:: - .. math:: + \mathrm{LL}(D, T) = \sum_{m \in T} \frac{n_m}{n} H(Q_m) - \mathrm{LL}(D, T) = \sum_{m \in T} \frac{n_m}{n} H(Q_m) +|details-end| Regression criteria ------------------- @@ -572,6 +590,65 @@ Mean Absolute Error: Note that it fits much slower than the MSE criterion. +.. _tree_missing_value_support: + +Missing Values Support +====================== + +:class:`DecisionTreeClassifier` and :class:`DecisionTreeRegressor` +have built-in support for missing values when `splitter='best'` and criterion is +`'gini'`, `'entropy`', or `'log_loss'`, for classification or +`'squared_error'`, `'friedman_mse'`, or `'poisson'` for regression. + +For each potential threshold on the non-missing data, the splitter will evaluate +the split with all the missing values going to the left node or the right node. + +Decisions are made as follows: + +- By default when predicting, the samples with missing values are classified + with the class used in the split found during training:: + + >>> from sklearn.tree import DecisionTreeClassifier + >>> import numpy as np + + >>> X = np.array([0, 1, 6, np.nan]).reshape(-1, 1) + >>> y = [0, 0, 1, 1] + + >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) + >>> tree.predict(X) + array([0, 0, 1, 1]) + +- If the criterion evaluation is the same for both nodes, + then the tie for missing value at predict time is broken by going to the + right node. The splitter also checks the split where all the missing + values go to one child and non-missing values go to the other:: + + >>> from sklearn.tree import DecisionTreeClassifier + >>> import numpy as np + + >>> X = np.array([np.nan, -1, np.nan, 1]).reshape(-1, 1) + >>> y = [0, 0, 1, 1] + + >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) + + >>> X_test = np.array([np.nan]).reshape(-1, 1) + >>> tree.predict(X_test) + array([1]) + +- If no missing values are seen during training for a given feature, then during + prediction missing values are mapped to the child with the most samples:: + + >>> from sklearn.tree import DecisionTreeClassifier + >>> import numpy as np + + >>> X = np.array([0, 1, 2, 3]).reshape(-1, 1) + >>> y = [0, 1, 1, 1] + + >>> tree = DecisionTreeClassifier(random_state=0).fit(X, y) + + >>> X_test = np.array([np.nan]).reshape(-1, 1) + >>> tree.predict(X_test) + array([1]) .. _minimal_cost_complexity_pruning: @@ -612,17 +689,21 @@ be pruned. This process stops when the pruned tree's minimal * :ref:`sphx_glr_auto_examples_tree_plot_cost_complexity_pruning.py` -.. topic:: References: +|details-start| +**References** +|details-split| + +.. [BRE] L. Breiman, J. Friedman, R. Olshen, and C. Stone. Classification + and Regression Trees. Wadsworth, Belmont, CA, 1984. - .. [BRE] L. Breiman, J. Friedman, R. Olshen, and C. Stone. Classification - and Regression Trees. Wadsworth, Belmont, CA, 1984. +* https://en.wikipedia.org/wiki/Decision_tree_learning - * https://en.wikipedia.org/wiki/Decision_tree_learning +* https://en.wikipedia.org/wiki/Predictive_analytics - * https://en.wikipedia.org/wiki/Predictive_analytics +* J.R. Quinlan. C4. 5: programs for machine learning. Morgan + Kaufmann, 1993. - * J.R. Quinlan. C4. 5: programs for machine learning. Morgan - Kaufmann, 1993. +* T. Hastie, R. Tibshirani and J. Friedman. Elements of Statistical + Learning, Springer, 2009. - * T. Hastie, R. Tibshirani and J. Friedman. Elements of Statistical - Learning, Springer, 2009. +|details-end| diff --git a/doc/modules/unsupervised_reduction.rst b/doc/modules/unsupervised_reduction.rst index 6e16886064cfc..90c80714c3131 100644 --- a/doc/modules/unsupervised_reduction.rst +++ b/doc/modules/unsupervised_reduction.rst @@ -31,7 +31,7 @@ capture well the variance of the original features. See :ref:`decompositions`. Random projections ------------------- -The module: :mod:`random_projection` provides several tools for data +The module: :mod:`~sklearn.random_projection` provides several tools for data reduction by random projections. See the relevant section of the documentation: :ref:`random_projection`. @@ -55,6 +55,5 @@ similarly. Note that if features have very different scaling or statistical properties, :class:`cluster.FeatureAgglomeration` may not be able to - capture the links between related features. Using a + capture the links between related features. Using a :class:`preprocessing.StandardScaler` can be useful in these settings. - diff --git a/doc/presentations.rst b/doc/presentations.rst index 2a465af8247a7..19fd09218b5fd 100644 --- a/doc/presentations.rst +++ b/doc/presentations.rst @@ -37,42 +37,42 @@ Videos `_ by `Gael Varoquaux`_ at ICML 2010 - A three minute video from a very early stage of scikit-learn, explaining the - basic idea and approach we are following. + A three minute video from a very early stage of scikit-learn, explaining the + basic idea and approach we are following. - `Introduction to statistical learning with scikit-learn `_ by `Gael Varoquaux`_ at SciPy 2011 - An extensive tutorial, consisting of four sessions of one hour. - The tutorial covers the basics of machine learning, - many algorithms and how to apply them using scikit-learn. The - material corresponding is now in the scikit-learn documentation - section :ref:`stat_learn_tut_index`. + An extensive tutorial, consisting of four sessions of one hour. + The tutorial covers the basics of machine learning, + many algorithms and how to apply them using scikit-learn. The + material corresponding is now in the scikit-learn documentation + section :ref:`stat_learn_tut_index`. - `Statistical Learning for Text Classification with scikit-learn and NLTK `_ (and `slides `_) by `Olivier Grisel`_ at PyCon 2011 - Thirty minute introduction to text classification. Explains how to - use NLTK and scikit-learn to solve real-world text classification - tasks and compares against cloud-based solutions. + Thirty minute introduction to text classification. Explains how to + use NLTK and scikit-learn to solve real-world text classification + tasks and compares against cloud-based solutions. - `Introduction to Interactive Predictive Analytics in Python with scikit-learn `_ by `Olivier Grisel`_ at PyCon 2012 - 3-hours long introduction to prediction tasks using scikit-learn. + 3-hours long introduction to prediction tasks using scikit-learn. - `scikit-learn - Machine Learning in Python `_ by `Jake Vanderplas`_ at the 2012 PyData workshop at Google - Interactive demonstration of some scikit-learn features. 75 minutes. + Interactive demonstration of some scikit-learn features. 75 minutes. - `scikit-learn tutorial `_ by `Jake Vanderplas`_ at PyData NYC 2012 - Presentation using the online tutorial, 45 minutes. + Presentation using the online tutorial, 45 minutes. -.. _Gael Varoquaux: http://gael-varoquaux.info +.. _Gael Varoquaux: https://gael-varoquaux.info .. _Jake Vanderplas: http://www.vanderplas.com .. _Olivier Grisel: https://twitter.com/ogrisel diff --git a/doc/related_projects.rst b/doc/related_projects.rst index e3c4477ff2306..e6d0bd83f0a16 100644 --- a/doc/related_projects.rst +++ b/doc/related_projects.rst @@ -21,9 +21,6 @@ enhance the functionality of scikit-learn's estimators. **Data formats** -- `Fast svmlight / libsvm file loader `_ - Fast and memory-efficient svmlight / libsvm file loader for Python. - - `sklearn_pandas `_ bridge for scikit-learn pipelines and pandas data frame with dedicated transformers. @@ -64,19 +61,20 @@ enhance the functionality of scikit-learn's estimators. It incorporates multiple modeling libraries under one API, and the objects that EvalML creates use an sklearn-compatible API. -**Experimentation frameworks** +**Experimentation and model registry frameworks** + +- `MLFlow `_ MLflow is an open source platform to manage the ML + lifecycle, including experimentation, reproducibility, deployment, and a central + model registry. - `Neptune `_ Metadata store for MLOps, - built for teams that run a lot of experiments.‌ It gives you a single + built for teams that run a lot of experiments. It gives you a single place to log, store, display, organize, compare, and query all your model building metadata. - `Sacred `_ Tool to help you configure, organize, log and reproduce experiments -- `REP `_ Environment for conducting data-driven - research in a consistent and reproducible way - - `Scikit-Learn Laboratory `_ A command-line wrapper around scikit-learn that makes it easy to run machine learning @@ -91,8 +89,10 @@ enhance the functionality of scikit-learn's estimators. debugging/inspecting machine learning models and explaining their predictions. -- `mlxtend `_ Includes model visualization - utilities. +- `sklearn-evaluation `_ + Machine learning model evaluation made easy: plots, tables, HTML reports, + experiment tracking and Jupyter notebook analysis. Visual analysis, model + selection, evaluation and diagnostics. - `yellowbrick `_ A suite of custom matplotlib visualizers for scikit-learn estimators to support visual feature @@ -115,6 +115,10 @@ enhance the functionality of scikit-learn's estimators. Scikit-learn pipelines to `ONNX `_ for interchange and prediction. +- `skops.io `__ A + persistence model more secure than pickle, which can be used instead of + pickle in most common cases. + - `sklearn2pmml `_ Serialization of a wide variety of scikit-learn estimators and transformers into PMML with the help of `JPMML-SkLearn `_ @@ -132,6 +136,25 @@ enhance the functionality of scikit-learn's estimators. Compiles tree-based ensemble models into C code for minimizing prediction latency. +- `micromlgen `_ + MicroML brings Machine Learning algorithms to microcontrollers. + Supports several scikit-learn classifiers by transpiling them to C code. + +- `emlearn `_ + Implements scikit-learn estimators in C99 for embedded devices and microcontrollers. + Supports several classifier, regression and outlier detection models. + +**Model throughput** + +- `Intel(R) Extension for scikit-learn `_ + Mostly on high end Intel(R) hardware, accelerates some scikit-learn models + for both training and inference under certain circumstances. This project is + maintained by Intel(R) and scikit-learn's maintainers are not involved in the + development of this project. Also note that in some cases using the tools and + estimators under ``scikit-learn-intelex`` would give different results than + ``scikit-learn`` itself. If you encounter issues while using this project, + make sure you report potential issues in their respective repositories. + Other estimators and tasks -------------------------- @@ -141,12 +164,40 @@ project. The following are projects providing interfaces similar to scikit-learn for additional learning algorithms, infrastructures and tasks. -**Structured learning** +**Time series and forecasting** + +- `Darts `_ Darts is a Python library for + user-friendly forecasting and anomaly detection on time series. It contains a variety + of models, from classics such as ARIMA to deep neural networks. The forecasting + models can all be used in the same way, using fit() and predict() functions, similar + to scikit-learn. + +- `sktime `_ A scikit-learn compatible + toolbox for machine learning with time series including time series + classification/regression and (supervised/panel) forecasting. + +- `skforecast `_ A python library + that eases using scikit-learn regressors as multi-step forecasters. It also works + with any regressor compatible with the scikit-learn API. + +- `tslearn `_ A machine learning library for + time series that offers tools for pre-processing and feature extraction as well as + dedicated models for clustering, classification and regression. + +**Gradient (tree) boosting** + +Note scikit-learn own modern gradient boosting estimators +:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and +:class:`~sklearn.ensemble.HistGradientBoostingRegressor`. -- `tslearn `_ A machine learning library for time series - that offers tools for pre-processing and feature extraction as well as dedicated models for clustering, classification and regression. +- `XGBoost `_ XGBoost is an optimized distributed + gradient boosting library designed to be highly efficient, flexible and portable. -- `sktime `_ A scikit-learn compatible toolbox for machine learning with time series including time series classification/regression and (supervised/panel) forecasting. +- `LightGBM `_ LightGBM is a gradient boosting + framework that uses tree based learning algorithms. It is designed to be distributed + and efficient. + +**Structured learning** - `HMMLearn `_ Implementation of hidden markov models that was previously part of scikit-learn. @@ -162,16 +213,8 @@ and tasks. (`CRFsuite `_ wrapper with sklearn-like API). -**Deep neural networks etc.** - -- `nolearn `_ A number of wrappers and - abstractions around existing neural network libraries -- `Keras `_ High-level API for - TensorFlow with a scikit-learn inspired API. - -- `lasagne `_ A lightweight library to - build and train neural networks in Theano. +**Deep neural networks etc.** - `skorch `_ A scikit-learn compatible neural network library that wraps PyTorch. @@ -185,6 +228,14 @@ and tasks. - `Flower `_ A friendly federated learning framework with a unified approach that can federate any workload, any ML framework, and any programming language. +**Privacy Preserving Machine Learning** + +- `Concrete ML `_ A privacy preserving + ML framework built on top of `Concrete + `_, with bindings to traditional ML + frameworks, thanks to fully homomorphic encryption. APIs of so-called + Concrete ML built-in models are very close to scikit-learn APIs. + **Broad scope** - `mlxtend `_ Includes a number of additional @@ -195,9 +246,6 @@ and tasks. **Other regression and classification** -- `xgboost `_ Optimised gradient boosted decision - tree library. - - `ML-Ensemble `_ Generalized ensemble learning (stacking, blending, subsemble, deep ensembles, etc.). @@ -208,10 +256,6 @@ and tasks. - `py-earth `_ Multivariate adaptive regression splines -- `Kernel Regression `_ - Implementation of Nadaraya-Watson kernel regression with automatic bandwidth - selection - - `gplearn `_ Genetic Programming for symbolic regression tasks. @@ -221,8 +265,6 @@ and tasks. - `seglearn `_ Time series and sequence learning using sliding window segmentation. -- `libOPF `_ Optimal path forest classifier - - `fastFM `_ Fast factorization machine implementation compatible with scikit-learn @@ -242,6 +284,7 @@ and tasks. - `hdbscan `_ HDBSCAN and Robust Single Linkage clustering algorithms for robust variable density clustering. + As of scikit-learn version 1.3.0, there is :class:`~sklearn.cluster.HDBSCAN`. - `spherecluster `_ Spherical K-means and mixture of von Mises Fisher clustering routines for data on the @@ -252,6 +295,8 @@ and tasks. - `categorical-encoding `_ A library of sklearn compatible categorical variable encoders. + As of scikit-learn version 1.3.0, there is + :class:`~sklearn.preprocessing.TargetEncoder`. - `imbalanced-learn `_ Various @@ -285,7 +330,7 @@ Other packages useful for data analysis and machine learning. statistical models. More focused on statistical tests and less on prediction than scikit-learn. -- `PyMC `_ Bayesian statistical models and +- `PyMC `_ Bayesian statistical models and fitting algorithms. - `Seaborn `_ Visualization library based on @@ -307,10 +352,7 @@ Recommendation Engine packages - `OpenRec `_ TensorFlow-based neural-network inspired recommendation algorithms. -- `Spotlight `_ Pytorch-based - implementation of deep recommender models. - -- `Surprise Lib `_ Library for explicit feedback +- `Surprise Lib `_ Library for explicit feedback datasets. Domain specific packages @@ -331,9 +373,6 @@ Domain specific packages - `AstroML `_ Machine learning for astronomy. -- `MSMBuilder `_ Machine learning for protein - conformational dynamics time series. - Translations of scikit-learn documentation ------------------------------------------ @@ -356,10 +395,11 @@ and promote community efforts. (`source `__) - `Spanish translation `_ (`source `__) +- `Korean translation `_ + (`source `__) .. rubric:: Footnotes .. [#f1] following `linux documentation Disclaimer `__ - diff --git a/doc/roadmap.rst b/doc/roadmap.rst index be3607cf542fb..3d6cda2d6c969 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -1,5 +1,3 @@ -.. _roadmap: - .. |ss| raw:: html @@ -8,6 +6,8 @@ +.. _roadmap: + Roadmap ======= diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py index 89af4bbee6670..89d7077bce2b5 100755 --- a/doc/sphinxext/allow_nan_estimators.py +++ b/doc/sphinxext/allow_nan_estimators.py @@ -1,11 +1,12 @@ -from sklearn.utils import all_estimators -from sklearn.utils.estimator_checks import _construct_instance -from sklearn.utils._testing import SkipTest -from docutils import nodes from contextlib import suppress +from docutils import nodes from docutils.parsers.rst import Directive +from sklearn.utils import all_estimators +from sklearn.utils._testing import SkipTest +from sklearn.utils.estimator_checks import _construct_instance + class AllowNanEstimators(Directive): @staticmethod @@ -45,7 +46,6 @@ def run(self): def setup(app): - app.add_directive("allow_nan_estimators", AllowNanEstimators) return { diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py index f851a12ec69ea..9f117b07fa6a3 100644 --- a/doc/sphinxext/doi_role.py +++ b/doc/sphinxext/doi_role.py @@ -1,22 +1,20 @@ -# -*- coding: utf-8 -*- """ - doilinks - ~~~~~~~~ - Extension to add links to DOIs. With this extension you can use e.g. - :doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will - create a link to a DOI resolver - (``https://doi.org/10.1016/S0022-2836(05)80360-2``). - The link caption will be the raw DOI. - You can also give an explicit caption, e.g. - :doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`. - - :copyright: Copyright 2015 Jon Lund Steffensen. Based on extlinks by - the Sphinx team. - :license: BSD. +doilinks +~~~~~~~~ +Extension to add links to DOIs. With this extension you can use e.g. +:doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will +create a link to a DOI resolver +(``https://doi.org/10.1016/S0022-2836(05)80360-2``). +The link caption will be the raw DOI. +You can also give an explicit caption, e.g. +:doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`. + +:copyright: Copyright 2015 Jon Lund Steffensen. Based on extlinks by + the Sphinx team. +:license: BSD. """ from docutils import nodes, utils - from sphinx.util.nodes import split_explicit_title diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py index 3992d814b825e..2cd1fbd83af47 100644 --- a/doc/sphinxext/github_link.py +++ b/doc/sphinxext/github_link.py @@ -1,9 +1,9 @@ -from operator import attrgetter import inspect -import subprocess import os +import subprocess import sys from functools import partial +from operator import attrgetter REVISION_CMD = "git rev-parse --short HEAD" @@ -26,10 +26,10 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): >>> _linkcode_resolve('py', {'module': 'tty', ... 'fullname': 'setraw'}, ... package='tty', - ... url_fmt='http://hg.python.org/cpython/file/' + ... url_fmt='https://hg.python.org/cpython/file/' ... '{revision}/Lib/{package}/{path}#L{lineno}', ... revision='xxxx') - 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' + 'https://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' """ if revision is None: diff --git a/doc/sphinxext/sphinx_issues.py b/doc/sphinxext/sphinx_issues.py index aa33a6f38e762..206359a1bd703 100644 --- a/doc/sphinxext/sphinx_issues.py +++ b/doc/sphinxext/sphinx_issues.py @@ -18,6 +18,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ + import re from docutils import nodes, utils @@ -75,7 +76,6 @@ def cve_role(name, rawtext, text, lineno, inliner, options=None, content=None): class IssueRole(object): - EXTERNAL_REPO_REGEX = re.compile(r"^(\w+)/(.+)([#@])([\w]+)$") def __init__( diff --git a/doc/support.rst b/doc/support.rst index 751833fa57e5d..be9b32b60a9c8 100644 --- a/doc/support.rst +++ b/doc/support.rst @@ -2,96 +2,120 @@ Support ======= -There are several ways to get in touch with the developers. +There are several channels to connect with scikit-learn developers for assistance, feedback, or contributions. +**Note**: Communications on all channels should respect our `Code of Conduct `_. -.. _mailing_lists: -Mailing List -============ +.. _announcements_and_notification: -- The main mailing list is `scikit-learn - `_. +Mailing Lists +============= -- There is also a commit list `scikit-learn-commits - `_, - where updates to the main repository and test failures get notified. +- **Main Mailing List**: Join the primary discussion + platform for scikit-learn at `scikit-learn Mailing List + `_. +- **Commit Updates**: Stay informed about repository + updates and test failures on the `scikit-learn-commits list + `_. .. _user_questions: -User questions +User Questions ============== -- Some scikit-learn developers support users on StackOverflow using - the `[scikit-learn] `_ +If you have questions, this is our general workflow. + +- **Stack Overflow**: Some scikit-learn developers support users using the + `[scikit-learn] `_ tag. -- For general theoretical or methodological Machine Learning questions - `stack exchange `_ is probably a more - suitable venue. +- **General Machine Learning Queries**: For broader machine learning + discussions, visit `Stack Exchange `_. + +When posting questions: + +- Please use a descriptive question in the title field (e.g. no "Please + help with scikit-learn!" as this is not a question) + +- Provide detailed context, expected results, and actual observations. + +- Include code and data snippets (preferably minimalistic scripts, + up to ~20 lines). -In both cases please use a descriptive question in the title field (e.g. -no "Please help with scikit-learn!" as this is not a question) and put -details on what you tried to achieve, what were the expected results and -what you observed instead in the details field. +- Describe your data and preprocessing steps, including sample size, + feature types (categorical or numerical), and the target for supervised + learning tasks (classification type or regression). -Code and data snippets are welcome. Minimalistic (up to ~20 lines long) -reproduction script very helpful. +**Note**: Avoid asking user questions on the bug tracker to keep +the focus on development. -Please describe the nature of your data and how you preprocessed it: -what is the number of samples, what is the number and type of features -(i.d. categorical or numerical) and for supervised learning tasks, -what target are your trying to predict: binary, multiclass (1 out of -``n_classes``) or multilabel (``k`` out of ``n_classes``) classification -or continuous variable regression. +- `GitHub Discussions `_ + Usage questions such as methodological -User questions should **not be asked on the bug tracker**, as it crowds -the list of issues and makes the development of the project harder. +- `Stack Overflow `_ + Programming/user questions with `[scikit-learn]` tag + +- `GitHub Bug Tracker `_ + Bug reports - Please do not ask usage questions on the issue tracker. + +- `Discord Server `_ + Current pull requests - Post any specific PR-related questions on your PR, + and you can share a link to your PR on this server. .. _bug_tracker: -Bug tracker +Bug Tracker =========== -If you think you've encountered a bug, please report it to the issue tracker: +Encountered a bug? Report it on our `issue tracker +`_ + +Include in your report: -https://github.com/scikit-learn/scikit-learn/issues +- Steps or scripts to reproduce the bug. -Don't forget to include: +- Expected and observed outcomes. - - steps (or better script) to reproduce, +- Python or gdb tracebacks, if applicable. - - expected outcome, +- The ideal bug report contains a :ref:`short reproducible code snippet + `, this way anyone can try to reproduce the bug easily. - - observed outcome or Python (or gdb) tracebacks +- If your snippet is longer than around 50 lines, please link to a + `gist `_ or a github repo. -To help developers fix your bug faster, please link to a https://gist.github.com -holding a standalone minimalistic python script that reproduces your bug and -optionally a minimalistic subsample of your dataset (for instance, exported -as CSV files using ``numpy.savetxt``). +**Tip**: Gists are Git repositories; you can push data files to them using Git. -Note: Gists are Git cloneable repositories and thus you can use Git to -push datafiles to them. +.. _social_media: +Social Media +============ + +scikit-learn has presence on various social media platforms to share +updates with the community. The platforms are not monitored for user +questions. .. _gitter: Gitter ====== -Some developers like to hang out on scikit-learn Gitter room: -https://gitter.im/scikit-learn/scikit-learn. - +**Note**: The scikit-learn Gitter room is no longer an active community. +For live discussions and support, please refer to the other channels +mentioned in this document. .. _documentation_resources: -Documentation resources +Documentation Resources ======================= -This documentation is relative to |release|. Documentation for -other versions can be found `here -`__. +This documentation is for |release|. Find documentation for other versions +`here `__. -Printable pdf documentation for old versions can be found `here +Older versions' printable PDF documentation is available `here `_. +Building the PDF documentation is no longer supported in the website, +but you can still generate it locally by following the +:ref:`building documentation instructions `. diff --git a/doc/templates/class.rst b/doc/templates/class.rst index 79ff2cf807794..1e98be4099b73 100644 --- a/doc/templates/class.rst +++ b/doc/templates/class.rst @@ -1,3 +1,8 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + :mod:`{{module}}`.{{objname}} {{ underline }}============== diff --git a/doc/templates/class_with_call.rst b/doc/templates/class_with_call.rst index f98b7dbbf6578..bc1567709c9d3 100644 --- a/doc/templates/class_with_call.rst +++ b/doc/templates/class_with_call.rst @@ -1,3 +1,8 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + :mod:`{{module}}`.{{objname}} {{ underline }}=============== diff --git a/doc/templates/deprecated_class.rst b/doc/templates/deprecated_class.rst index 857e2c28ce1da..5c31936f6fc36 100644 --- a/doc/templates/deprecated_class.rst +++ b/doc/templates/deprecated_class.rst @@ -1,3 +1,8 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + :mod:`{{module}}`.{{objname}} {{ underline }}============== diff --git a/doc/templates/deprecated_class_with_call.rst b/doc/templates/deprecated_class_with_call.rst index a04efcb80be07..072a31112be50 100644 --- a/doc/templates/deprecated_class_with_call.rst +++ b/doc/templates/deprecated_class_with_call.rst @@ -1,3 +1,8 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + :mod:`{{module}}`.{{objname}} {{ underline }}=============== diff --git a/doc/templates/deprecated_class_without_init.rst b/doc/templates/deprecated_class_without_init.rst index c019992493610..a26afbead5451 100644 --- a/doc/templates/deprecated_class_without_init.rst +++ b/doc/templates/deprecated_class_without_init.rst @@ -1,3 +1,8 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + :mod:`{{module}}`.{{objname}} {{ underline }}============== diff --git a/doc/templates/deprecated_function.rst b/doc/templates/deprecated_function.rst index 6d13ac6aca2de..ead5abec27076 100644 --- a/doc/templates/deprecated_function.rst +++ b/doc/templates/deprecated_function.rst @@ -1,3 +1,8 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + :mod:`{{module}}`.{{objname}} {{ underline }}==================== diff --git a/doc/templates/display_all_class_methods.rst b/doc/templates/display_all_class_methods.rst new file mode 100644 index 0000000000000..b179473cf841e --- /dev/null +++ b/doc/templates/display_all_class_methods.rst @@ -0,0 +1,19 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + +.. include:: {{module}}.{{objname}}.examples +.. include:: {{module}}.{{objname}}.from_estimator.examples +.. include:: {{module}}.{{objname}}.from_predictions.examples + +.. raw:: html + +
diff --git a/doc/templates/display_only_from_estimator.rst b/doc/templates/display_only_from_estimator.rst new file mode 100644 index 0000000000000..9981910dc8be7 --- /dev/null +++ b/doc/templates/display_only_from_estimator.rst @@ -0,0 +1,18 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + +.. include:: {{module}}.{{objname}}.examples +.. include:: {{module}}.{{objname}}.from_estimator.examples + +.. raw:: html + +
diff --git a/doc/templates/function.rst b/doc/templates/function.rst index f4b11eda770e4..93d368ecfe6d5 100644 --- a/doc/templates/function.rst +++ b/doc/templates/function.rst @@ -1,3 +1,8 @@ +.. + The empty line below should not be removed. It is added such that the `rst_prolog` + is added before the :mod: directive. Otherwise, the rendering will show as a + paragraph instead of a header. + :mod:`{{module}}`.{{objname}} {{ underline }}==================== diff --git a/doc/templates/index.html b/doc/templates/index.html index 6fed789140124..74816a4b473d3 100644 --- a/doc/templates/index.html +++ b/doc/templates/index.html @@ -42,9 +42,10 @@

Machine Learning in

Identifying which category an object belongs to.

Applications: Spam detection, image recognition.
Algorithms: - SVM, + Gradient boosting, nearest neighbors, random forest, + logistic regression, and more...

@@ -62,14 +63,15 @@

Machine Learning in

Predicting a continuous-valued attribute associated with an object.

Applications: Drug response, Stock prices.
Algorithms: - SVR, + Gradient boosting, nearest neighbors, random forest, + ridge, and more...

Examples @@ -83,8 +85,9 @@

Machine Learning in

Applications: Customer segmentation, Grouping experiment outcomes
Algorithms: k-Means, - spectral clustering, - mean-shift, + HDBSCAN, + hierarchical + clustering, and more...

@@ -164,60 +167,40 @@

Machine Learning in

News

  • On-going development: - What's new (Changelog) -
  • -
  • October 2022. scikit-learn 1.1.3 is available for download (Changelog). -
  • -
  • August 2022. scikit-learn 1.1.2 is available for download (Changelog). -
  • -
  • May 2022. scikit-learn 1.1.1 is available for download (Changelog). -
  • -
  • May 2022. scikit-learn 1.1.0 is available for download (Changelog). -
  • -
  • December 2021. scikit-learn 1.0.2 is available for download (Changelog). + scikit-learn 1.6 (Changelog)
  • -
  • October 2021. scikit-learn 1.0.1 is available for download (Changelog). +
  • May 2024. scikit-learn 1.5.0 is available for download (Changelog).
  • -
  • September 2021. scikit-learn 1.0 is available for download (Changelog). +
  • April 2024. scikit-learn 1.4.2 is available for download (Changelog).
  • -
  • April 2021. scikit-learn 0.24.2 is available for download (Changelog). +
  • February 2024. scikit-learn 1.4.1.post1 is available for download (Changelog).
  • -
  • January 2021. scikit-learn 0.24.1 is available for download (Changelog). +
  • January 2024. scikit-learn 1.4.0 is available for download (Changelog).
  • -
  • December 2020. scikit-learn 0.24.0 is available for download (Changelog). -
  • -
  • August 2020. scikit-learn 0.23.2 is available for download (Changelog). -
  • -
  • May 2020. scikit-learn 0.23.1 is available for download (Changelog). -
  • -
  • May 2020. scikit-learn 0.23.0 is available for download (Changelog). -
  • -
  • Scikit-learn from 0.23 requires Python 3.6 or newer. -
  • -
  • March 2020. scikit-learn 0.22.2 is available for download (Changelog). -
  • January 2020. scikit-learn 0.22.1 is available for download (Changelog). -
  • December 2019. scikit-learn 0.22 is available for download (Changelog and Release Highlights). +
  • All releases: + What's new (Changelog)

Community

@@ -268,15 +251,15 @@

Who uses scikit-learn?

scikit-learn development and maintenance are financially supported by

+ - + - - +
diff --git a/doc/themes/scikit-learn-modern/javascript.html b/doc/themes/scikit-learn-modern/javascript.html index fc0dca1040e03..be4cf26073441 100644 --- a/doc/themes/scikit-learn-modern/javascript.html +++ b/doc/themes/scikit-learn-modern/javascript.html @@ -1,4 +1,4 @@ -{% if theme_google_analytics|tobool %} +{% if theme_legacy_google_analytics|tobool %} {% endif %} +{% if theme_analytics|tobool %} + +{% endif %} + + + + diff --git a/doc/themes/scikit-learn-modern/layout.html b/doc/themes/scikit-learn-modern/layout.html index a4b9733b68709..c95184d42c671 100644 --- a/doc/themes/scikit-learn-modern/layout.html +++ b/doc/themes/scikit-learn-modern/layout.html @@ -9,8 +9,9 @@ {%- set lang_attr = 'en' %} - - + + + {{ metatags }} @@ -19,10 +20,10 @@ {% block htmltitle %} Codestin Search App {% endblock %} - + - {% if favicon %} - + {% if favicon_url %} + {% endif %} @@ -33,9 +34,10 @@ {%- endif %} {%- endfor %} - - - + + + + {%- block extrahead %} {% endblock %} @@ -46,16 +48,6 @@
-
{%- if prev %} Prev @@ -77,7 +69,7 @@ {%- endif %} diff --git a/doc/themes/scikit-learn-modern/nav.html b/doc/themes/scikit-learn-modern/nav.html index c30c304116d88..14d82e2e46e95 100644 --- a/doc/themes/scikit-learn-modern/nav.html +++ b/doc/themes/scikit-learn-modern/nav.html @@ -27,6 +27,7 @@ ('Support', pathto('support'), ''), ('Related packages', pathto('related_projects'), ''), ('Roadmap', pathto('roadmap'), ''), + ('Governance', pathto('governance'), ''), ('About us', pathto('about'), ''), ('GitHub', 'https://github.com/scikit-learn/scikit-learn', ''), ('Other Versions and Download', 'https://scikit-learn.org/dev/versions.html', '')] @@ -34,11 +35,11 @@