diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000000..3ae009e45f26e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,83 @@ +# This linter job on GH actions is used to trigger the commenter bot +# in bot-lint-comment.yml file. It stores the output of the linter to be used +# by the commenter bot. +name: linter + +on: + - pull_request_target + +jobs: + lint: + runs-on: ubuntu-latest + + # setting any permission will set everything else to none for GITHUB_TOKEN + permissions: + pull-requests: none + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: 3.11 + + - name: Install dependencies + run: | + source build_tools/shared.sh + # Include pytest compatibility with mypy + pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint + + - name: Run linting + run: ./build_tools/linting.sh &> /tmp/linting_output.txt + + - name: Upload Artifact + if: always() + uses: actions/upload-artifact@v3 + with: + name: lint-log + path: /tmp/linting_output.txt + retention-days: 1 + + comment: + needs: lint + if: always() + runs-on: ubuntu-latest + + # We need these permissions to be able to post / update comments + permissions: + pull-requests: write + issues: write + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: 3.11 + + - name: Install dependencies + run: python -m pip install requests + + - name: Download artifact + id: download-artifact + uses: actions/download-artifact@v3 + with: + name: lint-log + + - name: Print log + run: cat linting_output.txt + + - name: Process Comments + id: process-comments + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + RUN_ID: ${{ github.run_id }} + LOG_FILE: linting_output.txt + run: python ./build_tools/get_comment.py diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py new file mode 100644 index 0000000000000..5115a085ff8b5 --- /dev/null +++ b/build_tools/get_comment.py @@ -0,0 +1,283 @@ +# This script is used to generate a comment for a PR when linting issues are +# detected. It is used by the `Comment on failed linting` GitHub Action. +# This script fails if there are not comments to be posted. + +import os + +import requests + + +def get_step_message(log, start, end, title, message, details): + """Get the message for a specific test. + + Parameters + ---------- + log : str + The log of the linting job. + + start : str + The string that marks the start of the test. + + end : str + The string that marks the end of the test. + + title : str + The title for this section. + + message : str + The message to be added at the beginning of the section. + + details : bool + Whether to add the details of each step. + + Returns + ------- + message : str + The message to be added to the comment. + """ + if end not in log: + return "" + res = ( + "-----------------------------------------------\n" + + f"### {title}\n\n" + + message + + "\n\n" + ) + if details: + res += ( + "
\n\n```\n" + + log[log.find(start) + len(start) + 1 : log.find(end) - 1] + + "\n```\n\n
\n\n" + ) + return res + + +def get_message(log_file, repo, run_id, details): + with open(log_file, "r") as f: + log = f.read() + + message = "" + + # black + message += get_step_message( + log, + start="### Running black ###", + end="Problems detected by black", + title="`black`", + message=( + "`black` detected issues. Please run `black .` locally and push " + "the changes. Here you can see the detected issues. Note that " + "running black might also fix some of the issues which might be " + "detected by `flake8`." + ), + details=details, + ) + + # flake8 + message += get_step_message( + log, + start="### Running flake8 ###", + end="Problems detected by flake8", + title="`flake8`", + message=( + "`flake8` detected issues. Please fix them locally and push the changes. " + "Here you can see the detected issues." + ), + details=details, + ) + + # mypy + message += get_step_message( + log, + start="### Running mypy ###", + end="Problems detected by mypy", + title="`mypy`", + message=( + "`mypy` detected issues. Please fix them locally and push the changes. " + "Here you can see the detected issues." + ), + details=details, + ) + + # cython-lint + message += get_step_message( + log, + start="### Running cython-lint ###", + end="Problems detected by cython-lint", + title="`cython-lint`", + message=( + "`cython-lint` detected issues. Please fix them locally and push " + "the changes. Here you can see the detected issues." + ), + details=details, + ) + + # deprecation order + message += get_step_message( + log, + start="### Checking for bad deprecation order ###", + end="Problems detected by deprecation order check", + title="Deprecation Order", + message=( + "Deprecation order check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + # doctest directives + message += get_step_message( + log, + start="### Checking for default doctest directives ###", + end="Problems detected by doctest directive check", + title="Doctest Directives", + message=( + "doctest directive check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + # joblib imports + message += get_step_message( + log, + start="### Checking for joblib imports ###", + end="Problems detected by joblib import check", + title="Joblib Imports", + message=( + "`joblib` import check detected issues. Please fix them locally and " + "push the changes. Here you can see the detected issues." + ), + details=details, + ) + + if not message: + # no issues detected, so this script "fails" + return ( + "## Linting Passed\n" + "All linting checks passed. Your pull request is in excellent shape! ☀️" + ) + + message = ( + "## Linting issues\n\n" + "This PR is introducing linting issues. Here's a summary of the issues. " + "Note that you can avoid having linting issues by enabling `pre-commit` " + "hooks. Instructions to enable them can be found [here](" + "https://scikit-learn.org/dev/developers/contributing.html#how-to-contribute)." + "\n\n" + "You can see the details of the linting issues under the `lint` job [here]" + f"(https://github.com/{repo}/actions/runs/{run_id})\n\n" + + message + ) + + return message + + +def get_headers(token): + """Get the headers for the GitHub API.""" + return { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + } + + +def find_lint_bot_comments(repo, token, pr_number): + """Get the comment from the linting bot.""" + # repo is in the form of "org/repo" + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments # noqa + response = requests.get( + f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", + headers=get_headers(token), + ) + response.raise_for_status() + all_comments = response.json() + + failed_comment = "This PR is introducing linting issues. Here's a summary of the" + success_comment = ( + "All linting checks passed. Your pull request is in excellent shape" + ) + + # Find all comments that match the linting bot, and return the first one. + # There should always be only one such comment, or none, if the PR is + # just created. + comments = [ + comment + for comment in all_comments + if comment["user"]["login"] == "github-actions[bot]" + and (failed_comment in comment["body"] or success_comment in comment["body"]) + ] + + if len(all_comments) > 25 and not comments: + # By default the API returns the first 30 comments. If we can't find the + # comment created by the bot in those, then we raise and we skip creating + # a comment in the first place. + raise RuntimeError("Comment not found in the first 30 comments.") + + return comments[0] if comments else None + + +def create_or_update_comment(comment, message, repo, pr_number, token): + """Create a new comment or update existing one.""" + # repo is in the form of "org/repo" + if comment is not None: + print("updating existing comment") + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment # noqa + response = requests.patch( + f"https://api.github.com/repos/{repo}/issues/comments/{comment['id']}", + headers=get_headers(token), + json={"body": message}, + ) + else: + print("creating new comment") + # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment # noqa + response = requests.post( + f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments", + headers=get_headers(token), + json={"body": message}, + ) + + response.raise_for_status() + + +if __name__ == "__main__": + repo = os.environ["GITHUB_REPOSITORY"] + token = os.environ["GITHUB_TOKEN"] + pr_number = os.environ["PR_NUMBER"] + log_file = os.environ["LOG_FILE"] + run_id = os.environ["RUN_ID"] + + if not repo or not token or not pr_number or not log_file or not run_id: + raise ValueError( + "One of the following environment variables is not set: " + "GITHUB_REPOSITORY, GITHUB_TOKEN, PR_NUMBER, LOG_FILE, RUN_ID" + ) + + try: + comment = find_lint_bot_comments(repo, token, pr_number) + except RuntimeError: + print("Comment not found in the first 30 comments. Skipping!") + exit(0) + + try: + message = get_message(log_file, repo=repo, run_id=run_id, details=True) + create_or_update_comment( + comment=comment, + message=message, + repo=repo, + pr_number=pr_number, + token=token, + ) + print(message) + except requests.HTTPError: + # The above fails if the message is too long. In that case, we + # try again without the details. + message = get_message(log_file, repo=repo, run_id=run_id, details=False) + create_or_update_comment( + comment=comment, + message=message, + repo=repo, + pr_number=pr_number, + token=token, + ) + print(message) diff --git a/build_tools/linting.sh b/build_tools/linting.sh index dd200b9d9cd95..76230abeb434c 100755 --- a/build_tools/linting.sh +++ b/build_tools/linting.sh @@ -1,27 +1,65 @@ #!/bin/bash -set -e +# Note that any change in this file, adding or removing steps or changing the +# printed messages, should be also reflected in the `get_comment.py` file. + +# This script shouldn't exit if a command / pipeline fails +set +e # pipefail is necessary to propagate exit codes set -o pipefail +global_status=0 + +echo -e "### Running black ###\n" black --check --diff . -echo -e "No problem detected by black\n" +status=$? + +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by black\n" +else + echo -e "Problems detected by black, please run black and commit the result\n" + global_status=1 +fi +echo -e "### Running flake8 ###\n" flake8 --show-source . -echo -e "No problem detected by flake8\n" +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by flake8\n" +else + echo -e "Problems detected by flake8, please fix them\n" + global_status=1 +fi +echo -e "### Running mypy ###\n" mypy sklearn/ -echo -e "No problem detected by mypy\n" +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by mypy\n" +else + echo -e "Problems detected by mypy, please fix them\n" + global_status=1 +fi +echo -e "### Running cython-lint ###\n" cython-lint sklearn/ -echo -e "No problem detected by cython-lint\n" +status=$? +if [[ $status -eq 0 ]] +then + echo -e "No problem detected by cython-lint\n" +else + echo -e "Problems detected by cython-lint, please fix them\n" + global_status=1 +fi # For docstrings and warnings of deprecated attributes to be rendered # properly, the property decorator must come before the deprecated decorator # (else they are treated as functions) -# do not error when grep -B1 "@property" finds nothing -set +e +echo -e "### Checking for bad deprecation order ###\n" bad_deprecation_property_order=`git grep -A 10 "@property" -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"` if [ ! -z "$bad_deprecation_property_order" ] @@ -29,29 +67,57 @@ then echo "property decorator should come before deprecated decorator" echo "found the following occurrences:" echo $bad_deprecation_property_order - exit 1 + echo -e "\nProblems detected by deprecation order check\n" + global_status=1 +else + echo -e "No problems detected related to deprecation order\n" fi # Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE +echo -e "### Checking for default doctest directives ###\n" doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")" if [ ! -z "$doctest_directive" ] then echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:" echo "$doctest_directive" - exit 1 + echo -e "\nProblems detected by doctest directive check\n" + global_status=1 +else + echo -e "No problems detected related to doctest directives\n" fi +# Check for joblib.delayed and joblib.Parallel imports + +echo -e "### Checking for joblib imports ###\n" +joblib_status=0 joblib_delayed_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" if [ ! -z "$joblib_delayed_import" ]; then echo "Use from sklearn.utils.parallel import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:" echo "$joblib_delayed_import" - exit 1 + joblib_status=1 fi joblib_Parallel_import="$(git grep -l -A 10 -E "joblib import.+Parallel" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")" if [ ! -z "$joblib_Parallel_import" ]; then echo "Use from sklearn.utils.parallel import Parallel instead of joblib Parallel. The following files contains imports to joblib.Parallel:" echo "$joblib_Parallel_import" + joblib_status=1 +fi + +if [[ $joblib_status -eq 0 ]] +then + echo -e "No problems detected related to joblib imports\n" +else + echo -e "\nProblems detected by joblib import check\n" + global_status=1 +fi + +if [[ $global_status -eq 1 ]] +then + echo -e "Linting failed\n" exit 1 +else + echo -e "Linting passed\n" + exit 0 fi