diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 550f8abb98da8..dfd71873c534c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -312,12 +312,14 @@ jobs: fi - name: Run clang-tidy env: - BASE_SHA: ${{ github.event.pull_request.base.sha }} HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PR_NUMBER: ${{ github.event.pull_request.number }} run: | cd "${GITHUB_WORKSPACE}" set -eux + wget -O pr.diff "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/$PR_NUMBER.diff" + # Run Clang-Tidy # The negative filters below are to exclude files that include onnx_pb.h or # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job. @@ -326,27 +328,28 @@ jobs: # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built. # deploy/interpreter files are excluded due to using macros and other techniquies # that are not easily converted to accepted c++ - python3 tools/clang_tidy.py \ - --verbose \ - --paths torch/csrc/ \ - --diff "$BASE_SHA" \ - -g"-torch/csrc/jit/passes/onnx/helper.cpp" \ - -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp"\ - -g"-torch/csrc/jit/serialization/onnx.cpp" \ - -g"-torch/csrc/jit/serialization/export.cpp" \ - -g"-torch/csrc/jit/serialization/import.cpp" \ - -g"-torch/csrc/jit/serialization/import_legacy.cpp" \ - -g"-torch/csrc/onnx/init.cpp" \ - -g"-torch/csrc/cuda/nccl.*" \ - -g"-torch/csrc/cuda/python_nccl.cpp" \ - -g"-torch/csrc/autograd/FunctionsManual.cpp" \ - -g"-torch/csrc/generic/*.cpp" \ - -g"-torch/csrc/jit/codegen/cuda/runtime/*" \ - -g"-torch/csrc/deploy/interpreter/interpreter.cpp" \ - -g"-torch/csrc/deploy/interpreter/interpreter.h" \ - -g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \ - -g"-torch/csrc/deploy/interpreter/test_main.cpp" \ - "$@" > "${GITHUB_WORKSPACE}"/clang-tidy-output.txt + python3 tools/clang_tidy.py \ + --verbose \ + --paths torch/csrc/ \ + --diff-file pr.diff \ + -g"-torch/csrc/jit/passes/onnx/helper.cpp" \ + -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \ + -g"-torch/csrc/jit/serialization/onnx.cpp" \ + -g"-torch/csrc/jit/serialization/export.cpp" \ + -g"-torch/csrc/jit/serialization/import.cpp" \ + -g"-torch/csrc/jit/serialization/import_legacy.cpp" \ + -g"-torch/csrc/onnx/init.cpp" \ + -g"-torch/csrc/cuda/nccl.*" \ + -g"-torch/csrc/cuda/python_nccl.cpp" \ + -g"-torch/csrc/autograd/FunctionsManual.cpp" \ + -g"-torch/csrc/generic/*.cpp" \ + -g"-torch/csrc/jit/codegen/cuda/runtime/*" \ + -g"-torch/csrc/deploy/interpreter/interpreter.cpp" \ + -g"-torch/csrc/deploy/interpreter/interpreter.h" \ + -g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \ + -g"-torch/csrc/deploy/interpreter/test_main.cpp" \ + "$@" >"${GITHUB_WORKSPACE}"/clang-tidy-output.txt + cat "${GITHUB_WORKSPACE}"/clang-tidy-output.txt diff --git a/tools/clang_tidy.py b/tools/clang_tidy.py index f5c71f41cd3d2..7574c4f3b538e 100755 --- a/tools/clang_tidy.py +++ b/tools/clang_tidy.py @@ -21,7 +21,6 @@ import os import os.path import re -import shlex import shutil import subprocess import sys @@ -32,7 +31,7 @@ except ImportError: from pipes import quote -from typing import Any, Dict, Iterable, List, Set, Union +from typing import Any, Dict, Iterable, List, Set, Tuple Patterns = collections.namedtuple("Patterns", "positive, negative") @@ -42,8 +41,13 @@ # (c/cc/cpp) file. DEFAULT_FILE_PATTERN = re.compile(r".*\.c(c|pp)?") -# @@ -start,count +start,count @@ -CHUNK_PATTERN = r"^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@" +# Search for: +# diff --git ... +# index ... +# --- ... +# +++ ... +CHUNK_HEADER_RE = r"diff --git .*?\nindex.*?\n---.*?\n\+\+\+ b/(.*?)\n@@ -(\d+,\d+) \+(\d+,\d+) @@" + CLANG_WARNING_PATTERN = re.compile(r"([^:]+):(\d+):\d+:\s+warning:.*\[([^\]]+)\]") @@ -125,35 +129,25 @@ def filter_files(files: Iterable[str], file_patterns: Patterns) -> Iterable[str] print("{} omitted due to file filters".format(file)) -def get_changed_files(revision: str, paths: List[str]) -> List[str]: - """Runs git diff to get the paths of all changed files.""" - # --diff-filter AMU gets us files that are (A)dded, (M)odified or (U)nmerged (in the working copy). - # --name-only makes git diff return only the file paths, without any of the source changes. - command = "git diff-index --diff-filter=AMU --ignore-all-space --name-only" - output = run_shell_command(shlex.split(command) + [revision] + paths) - return output.split("\n") - - def get_all_files(paths: List[str]) -> List[str]: """Returns all files that are tracked by git in the given paths.""" output = run_shell_command(["git", "ls-files"] + paths) return output.split("\n") -def get_changed_lines(revision: str, filename: str) -> Dict[str, Union[str, List[List[int]]]]: - """Runs git diff to get the line ranges of all file changes.""" - command = shlex.split("git diff-index --unified=0") + [revision, filename] - output = run_shell_command(command) - changed_lines = [] - for chunk in re.finditer(CHUNK_PATTERN, output, re.MULTILINE): - start = int(chunk.group(1)) - count = int(chunk.group(2) or 1) - # If count == 0, a chunk was removed and can be ignored. - if count == 0: - continue - changed_lines.append([start, start + count]) +def find_changed_lines(diff: str) -> Dict[str, List[Tuple[int, int]]]: + files = collections.defaultdict(list) + + matches = re.findall(CHUNK_HEADER_RE, diff, re.MULTILINE) + for file, start, end in matches: + start_line, _ = start.split(",") + end_line, _ = end.split(",") + print(file, start_line, end_line) + + files[file].append((start_line, end_line)) + + return dict(files) - return {"name": filename, "lines": changed_lines} ninja_template = """ rule do_cmd @@ -180,7 +174,7 @@ def run_shell_commands_in_parallel(commands: Iterable[List[str]]) -> str: return run_shell_command(['ninja', '-f', f.name]) -def run_clang_tidy(options: Any, line_filters: Any, files: Iterable[str]) -> str: +def run_clang_tidy(options: Any, line_filters: List[Dict[str, Any]], files: Iterable[str]) -> str: """Executes the actual clang-tidy command in the shell.""" command = [options.clang_tidy_exe, "-p", options.compile_commands_dir] if not options.config_file and os.path.exists(".clang-tidy"): @@ -283,7 +277,7 @@ def parse_options() -> Any: help="Path to the folder containing compile_commands.json", ) parser.add_argument( - "-d", "--diff", help="Git revision to diff against to get changes" + "--diff-file", help="File containing diff to use for determining files to lint and line filters" ) parser.add_argument( "-p", @@ -333,9 +327,15 @@ def main() -> None: # Normalize the paths first. paths = [path.rstrip("/") for path in options.paths] - if options.diff: - files = get_changed_files(options.diff, paths) + if options.diff_file: + with open(options.diff_file, "r") as f: + changed_files = find_changed_lines(f.read()) + line_filters = [ + {"name": name, "lines": lines} for name, lines, in changed_files.items() + ] + files = list(changed_files.keys()) else: + line_filters = [] files = get_all_files(paths) file_patterns = get_file_patterns(options.glob, options.regex) files = list(filter_files(files, file_patterns)) @@ -345,10 +345,6 @@ def main() -> None: print("No files detected.") sys.exit() - line_filters = [] - if options.diff: - line_filters = [get_changed_lines(options.diff, f) for f in files] - clang_tidy_output = run_clang_tidy(options, line_filters, files) if options.suppress_diagnostics: warnings = extract_warnings(clang_tidy_output, base_dir=options.compile_commands_dir)