|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +This file compares the output and runtime of running normal vs incremental mode |
| 4 | +on the history of any arbitrary git repo as a way of performing a sanity check |
| 5 | +to make sure incremental mode is working correctly and efficiently. |
| 6 | +
|
| 7 | +It does so by first running mypy without incremental mode on the specified range |
| 8 | +of commits to find the expected result, then rewinds back to the first commit and |
| 9 | +re-runs mypy on the commits with incremental mode enabled to make sure it returns |
| 10 | +the exact same result despite the files continuously changing. |
| 11 | +
|
| 12 | +This script will by default, download and test mypy's repo. So, doing: |
| 13 | +
|
| 14 | + python3 misc/incremental_checker.py last 30 |
| 15 | +
|
| 16 | +...is equivalent to doing |
| 17 | +
|
| 18 | + python3 misc/incremental_checker.py last 30 \\ |
| 19 | + --repo_url https://github.com/python/mypy.git \\ |
| 20 | + --file-path mypy |
| 21 | +
|
| 22 | +You can chose to run this script against a specific commit id, or against the |
| 23 | +last n commits. |
| 24 | +
|
| 25 | +For example, to run this script against the last 30 commits, do: |
| 26 | +
|
| 27 | + python3 misc/incremental_checker.py last 30 |
| 28 | +
|
| 29 | +To run this script starting from the commit id 2a432b, do: |
| 30 | +
|
| 31 | + python3 misc/incremental_checker.py commit 2a432b |
| 32 | +""" |
| 33 | + |
| 34 | +from typing import Any, Dict, List, Tuple |
| 35 | + |
| 36 | +from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentDefaultsHelpFormatter |
| 37 | +import json |
| 38 | +import os |
| 39 | +import shutil |
| 40 | +import subprocess |
| 41 | +import sys |
| 42 | +import textwrap |
| 43 | +import time |
| 44 | + |
| 45 | + |
| 46 | +CACHE_PATH = ".incremental_checker_cache.json" |
| 47 | +MYPY_REPO_URL = "https://github.com/python/mypy.git" |
| 48 | +MYPY_TARGET_FILE = "mypy" |
| 49 | + |
| 50 | +JsonDict = Dict[str, Any] |
| 51 | + |
| 52 | + |
| 53 | +def print_offset(text: str, indent_length: int = 4) -> None: |
| 54 | + print() |
| 55 | + print(textwrap.indent(text, ' ' * indent_length)) |
| 56 | + print() |
| 57 | + |
| 58 | + |
| 59 | +def delete_folder(folder_path: str) -> None: |
| 60 | + if os.path.exists(folder_path): |
| 61 | + shutil.rmtree(folder_path) |
| 62 | + |
| 63 | + |
| 64 | +def execute(command: List[str], fail_on_error: bool = True) -> Tuple[str, str, int]: |
| 65 | + proc = subprocess.Popen( |
| 66 | + ' '.join(command), |
| 67 | + stderr=subprocess.PIPE, |
| 68 | + stdout=subprocess.PIPE, |
| 69 | + shell=True) |
| 70 | + stdout_bytes, stderr_bytes = proc.communicate() # type: Tuple[bytes, bytes] |
| 71 | + stdout, stderr = stdout_bytes.decode('utf-8'), stderr_bytes.decode('utf-8') |
| 72 | + if fail_on_error and proc.returncode != 0: |
| 73 | + print('EXECUTED COMMAND:', repr(command)) |
| 74 | + print('RETURN CODE:', proc.returncode) |
| 75 | + print() |
| 76 | + print('STDOUT:') |
| 77 | + print_offset(stdout) |
| 78 | + print('STDERR:') |
| 79 | + print_offset(stderr) |
| 80 | + raise RuntimeError('Unexpected error from external tool.') |
| 81 | + return stdout, stderr, proc.returncode |
| 82 | + |
| 83 | + |
| 84 | +def ensure_environment_is_ready(mypy_path: str, temp_repo_path: str, mypy_cache_path: str) -> None: |
| 85 | + os.chdir(mypy_path) |
| 86 | + delete_folder(temp_repo_path) |
| 87 | + delete_folder(mypy_cache_path) |
| 88 | + |
| 89 | + |
| 90 | +def initialize_repo(repo_url: str, temp_repo_path: str, branch: str) -> None: |
| 91 | + print("Cloning repo {0} to {1}".format(repo_url, temp_repo_path)) |
| 92 | + execute(["git", "clone", repo_url, temp_repo_path]) |
| 93 | + if branch is not None: |
| 94 | + print("Checking out branch {}".format(branch)) |
| 95 | + execute(["git", "-C", temp_repo_path, "checkout", branch]) |
| 96 | + |
| 97 | + |
| 98 | +def get_commits(repo_folder_path: str, commit_range: str) -> List[Tuple[str, str]]: |
| 99 | + raw_data, _stderr, _errcode = execute([ |
| 100 | + "git", "-C", repo_folder_path, "log", "--reverse", "--oneline", commit_range]) |
| 101 | + output = [] |
| 102 | + for line in raw_data.strip().split('\n'): |
| 103 | + commit_id, _, message = line.partition(' ') |
| 104 | + output.append((commit_id, message)) |
| 105 | + return output |
| 106 | + |
| 107 | + |
| 108 | +def get_commits_starting_at(repo_folder_path: str, start_commit: str) -> List[Tuple[str, str]]: |
| 109 | + print("Fetching commits starting at {0}".format(start_commit)) |
| 110 | + return get_commits(repo_folder_path, '{0}^..HEAD'.format(start_commit)) |
| 111 | + |
| 112 | + |
| 113 | +def get_nth_commit(repo_folder_path, n: int) -> Tuple[str, str]: |
| 114 | + print("Fetching last {} commits (or all, if there are fewer commits than n)".format(n)) |
| 115 | + return get_commits(repo_folder_path, '-{}'.format(n))[0] |
| 116 | + |
| 117 | + |
| 118 | +def run_mypy(target_file_path: str, |
| 119 | + mypy_cache_path: str, |
| 120 | + incremental: bool = True, |
| 121 | + verbose: bool = False) -> Tuple[float, str]: |
| 122 | + """Runs mypy against `target_file_path` and returns what mypy prints to stdout as a string. |
| 123 | +
|
| 124 | + If `incremental` is set to True, this function will use store and retrieve all caching data |
| 125 | + inside `mypy_cache_path`. If `verbose` is set to True, this function will pass the "-v -v" |
| 126 | + flags to mypy to make it output debugging information. |
| 127 | + """ |
| 128 | + command = ["python3", "-m", "mypy", "--cache-dir", mypy_cache_path] |
| 129 | + if incremental: |
| 130 | + command.append("--incremental") |
| 131 | + if verbose: |
| 132 | + command.extend(["-v", "-v"]) |
| 133 | + command.append(target_file_path) |
| 134 | + start = time.time() |
| 135 | + output, stderr, _ = execute(command, False) |
| 136 | + if stderr != "": |
| 137 | + output = stderr |
| 138 | + runtime = time.time() - start |
| 139 | + return runtime, output |
| 140 | + |
| 141 | + |
| 142 | +def load_cache(incremental_cache_path: str = CACHE_PATH) -> JsonDict: |
| 143 | + if os.path.exists(incremental_cache_path): |
| 144 | + with open(incremental_cache_path, 'r') as stream: |
| 145 | + return json.load(stream) |
| 146 | + else: |
| 147 | + return {} |
| 148 | + |
| 149 | + |
| 150 | +def save_cache(cache: JsonDict, incremental_cache_path: str = CACHE_PATH) -> None: |
| 151 | + with open(incremental_cache_path, 'w') as stream: |
| 152 | + json.dump(cache, stream, indent=2) |
| 153 | + |
| 154 | + |
| 155 | +def set_expected(commits: List[Tuple[str, str]], |
| 156 | + cache: JsonDict, |
| 157 | + temp_repo_path: str, |
| 158 | + target_file_path: str, |
| 159 | + mypy_cache_path: str) -> None: |
| 160 | + """Populates the given `cache` with the expected results for all of the given `commits`. |
| 161 | +
|
| 162 | + This function runs mypy on the `target_file_path` inside the `temp_repo_path`, and stores |
| 163 | + the result in the `cache`. |
| 164 | +
|
| 165 | + If `cache` already contains results for a particular commit, this function will |
| 166 | + skip evaluating that commit and move on to the next.""" |
| 167 | + for commit_id, message in commits: |
| 168 | + if commit_id in cache: |
| 169 | + print('Skipping commit (already cached): {0}: "{1}"'.format(commit_id, message)) |
| 170 | + else: |
| 171 | + print('Caching expected output for commit {0}: "{1}"'.format(commit_id, message)) |
| 172 | + execute(["git", "-C", temp_repo_path, "checkout", commit_id]) |
| 173 | + runtime, output = run_mypy(target_file_path, mypy_cache_path, incremental=False) |
| 174 | + cache[commit_id] = {'runtime': runtime, 'output': output} |
| 175 | + if output == "": |
| 176 | + print(" Clean output ({:.3f} sec)".format(runtime)) |
| 177 | + else: |
| 178 | + print(" Output ({:.3f} sec)".format(runtime)) |
| 179 | + print_offset(output, 8) |
| 180 | + print() |
| 181 | + |
| 182 | + |
| 183 | +def test_incremental(commits: List[Tuple[str, str]], |
| 184 | + cache: JsonDict, |
| 185 | + temp_repo_path: str, |
| 186 | + target_file_path: str, |
| 187 | + mypy_cache_path: str) -> None: |
| 188 | + """Runs incremental mode on all `commits` to verify the output matches the expected output. |
| 189 | +
|
| 190 | + This function runs mypy on the `target_file_path` inside the `temp_repo_path`. The |
| 191 | + expected output must be stored inside of the given `cache`. |
| 192 | + """ |
| 193 | + print("Note: first commit is evaluated twice to warm up cache") |
| 194 | + commits = [commits[0]] + commits |
| 195 | + for commit_id, message in commits: |
| 196 | + print('Now testing commit {0}: "{1}"'.format(commit_id, message)) |
| 197 | + execute(["git", "-C", temp_repo_path, "checkout", commit_id]) |
| 198 | + runtime, output = run_mypy(target_file_path, mypy_cache_path, incremental=True) |
| 199 | + expected_runtime = cache[commit_id]['runtime'] # type: float |
| 200 | + expected_output = cache[commit_id]['output'] # type: str |
| 201 | + if output != expected_output: |
| 202 | + print(" Output does not match expected result!") |
| 203 | + print(" Expected output ({:.3f} sec):".format(expected_runtime)) |
| 204 | + print_offset(expected_output, 8) |
| 205 | + print(" Actual output: ({:.3f} sec):".format(runtime)) |
| 206 | + print_offset(output, 8) |
| 207 | + else: |
| 208 | + print(" Output matches expected result!") |
| 209 | + print(" Incremental: {:.3f} sec".format(runtime)) |
| 210 | + print(" Original: {:.3f} sec".format(expected_runtime)) |
| 211 | + |
| 212 | + |
| 213 | +def cleanup(temp_repo_path: str, mypy_cache_path: str) -> None: |
| 214 | + delete_folder(temp_repo_path) |
| 215 | + delete_folder(mypy_cache_path) |
| 216 | + |
| 217 | + |
| 218 | +def test_repo(target_repo_url: str, temp_repo_path: str, target_file_path: str, |
| 219 | + mypy_path: str, incremental_cache_path: str, mypy_cache_path: str, |
| 220 | + range_type: str, range_start: str, branch: str) -> None: |
| 221 | + """Tests incremental mode against the repo specified in `target_repo_url`. |
| 222 | +
|
| 223 | + This algorithm runs in five main stages: |
| 224 | +
|
| 225 | + 1. Clones `target_repo_url` into the `temp_repo_path` folder locally, |
| 226 | + checking out the specified `branch` if applicable. |
| 227 | + 2. Examines the repo's history to get the list of all commits to |
| 228 | + to test incremental mode on. |
| 229 | + 3. Runs mypy WITHOUT incremental mode against the `target_file_path` (which is |
| 230 | + assumed to be located inside the `temp_repo_path`), testing each commit |
| 231 | + discovered in stage two. |
| 232 | + - If the results of running mypy WITHOUT incremental mode on a |
| 233 | + particular commit are already cached inside the `incremental_cache_path`, |
| 234 | + skip that commit to save time. |
| 235 | + - Cache the results after finishing. |
| 236 | + 4. Rewind back to the first commit, and run mypy WITH incremental mode |
| 237 | + against the `target_file_path` commit-by-commit, and compare to the expected |
| 238 | + results found in stage 3. |
| 239 | + 5. Delete all unnecessary temp files. |
| 240 | + """ |
| 241 | + # Stage 1: Clone repo and get ready to being testing |
| 242 | + ensure_environment_is_ready(mypy_path, temp_repo_path, mypy_cache_path) |
| 243 | + initialize_repo(target_repo_url, temp_repo_path, branch) |
| 244 | + |
| 245 | + # Stage 2: Get all commits we want to test |
| 246 | + if range_type == "last": |
| 247 | + start_commit = get_nth_commit(temp_repo_path, int(range_start))[0] |
| 248 | + elif range_type == "commit": |
| 249 | + start_commit = range_start |
| 250 | + else: |
| 251 | + raise RuntimeError("Invalid option: {}".format(range_type)) |
| 252 | + commits = get_commits_starting_at(temp_repo_path, start_commit) |
| 253 | + |
| 254 | + # Stage 3: Find and cache expected results for each commit (without incremental mode) |
| 255 | + cache = load_cache(incremental_cache_path) |
| 256 | + set_expected(commits, cache, temp_repo_path, target_file_path, mypy_cache_path) |
| 257 | + save_cache(cache, incremental_cache_path) |
| 258 | + |
| 259 | + # Stage 4: Rewind and re-run mypy (with incremental mode enabled) |
| 260 | + test_incremental(commits, cache, temp_repo_path, target_file_path, mypy_cache_path) |
| 261 | + |
| 262 | + # Stage 5: Remove temp files |
| 263 | + cleanup(temp_repo_path, mypy_cache_path) |
| 264 | + |
| 265 | + |
| 266 | +def main() -> None: |
| 267 | + help_factory = (lambda prog: RawDescriptionHelpFormatter(prog=prog, max_help_position=32)) |
| 268 | + parser = ArgumentParser( |
| 269 | + prog='incremental_checker', |
| 270 | + description=__doc__, |
| 271 | + formatter_class=help_factory) |
| 272 | + |
| 273 | + parser.add_argument("range_type", metavar="START_TYPE", choices=["last", "commit"], |
| 274 | + help="must be one of 'last' or 'commit'") |
| 275 | + parser.add_argument("range_start", metavar="COMMIT_ID_OR_NUMBER", |
| 276 | + help="the commit id to start from, or the number of " |
| 277 | + "commits to move back (see above)") |
| 278 | + parser.add_argument("-r", "--repo_url", default=MYPY_REPO_URL, metavar="URL", |
| 279 | + help="the repo to clone and run tests on") |
| 280 | + parser.add_argument("-f", "--file-path", default=MYPY_TARGET_FILE, metavar="FILE", |
| 281 | + help="the name of the file or directory to typecheck") |
| 282 | + parser.add_argument("--cache-path", default=CACHE_PATH, metavar="DIR", |
| 283 | + help="sets a custom location to store cache data") |
| 284 | + parser.add_argument("--branch", default=None, metavar="NAME", |
| 285 | + help="check out and test a custom branch" |
| 286 | + "uses the default if not specified") |
| 287 | + |
| 288 | + if len(sys.argv[1:]) == 0: |
| 289 | + parser.print_help() |
| 290 | + parser.exit() |
| 291 | + |
| 292 | + params = parser.parse_args(sys.argv[1:]) |
| 293 | + |
| 294 | + # Make all paths absolute so we avoid having to worry about being in the right folder |
| 295 | + |
| 296 | + # The path to this specific script (incremental_checker.py). |
| 297 | + script_path = os.path.abspath(sys.argv[0]) |
| 298 | + |
| 299 | + # The path to the mypy repo. |
| 300 | + mypy_path = os.path.abspath(os.path.dirname(os.path.dirname(script_path))) |
| 301 | + |
| 302 | + # The folder the cloned repo will reside in. |
| 303 | + temp_repo_path = os.path.abspath(os.path.join(mypy_path, "tmp_repo")) |
| 304 | + |
| 305 | + # The particular file or package to typecheck inside the repo. |
| 306 | + target_file_path = os.path.abspath(os.path.join(temp_repo_path, params.file_path)) |
| 307 | + |
| 308 | + # The path to where the incremental checker cache data is stored. |
| 309 | + incremental_cache_path = os.path.abspath(params.cache_path) |
| 310 | + |
| 311 | + # The path to store the mypy incremental mode cache data |
| 312 | + mypy_cache_path = os.path.abspath(os.path.join(mypy_path, "misc", ".mypy_cache")) |
| 313 | + |
| 314 | + print("Assuming mypy is located at {0}".format(mypy_path)) |
| 315 | + print("Temp repo will be cloned at {0}".format(temp_repo_path)) |
| 316 | + print("Testing file/dir located at {0}".format(target_file_path)) |
| 317 | + print("Using cache data located at {0}".format(incremental_cache_path)) |
| 318 | + print() |
| 319 | + |
| 320 | + test_repo(params.repo_url, temp_repo_path, target_file_path, |
| 321 | + mypy_path, incremental_cache_path, mypy_cache_path, |
| 322 | + params.range_type, params.range_start, params.branch) |
| 323 | + |
| 324 | + |
| 325 | +if __name__ == '__main__': |
| 326 | + main() |
0 commit comments