Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1f7ece5

Browse files
Michael0x2agvanrossum
authored andcommitted
Add script to stress-test incremental mode (#1870)
The `incremental_checker.py` script, when run within the `misc` folder, will run incremental mode on a portion of mypy's commit history using the version of mypy in the parent folder. The checker script will check out an independent copy of the mypy repo to test to avoid interfering with the current repo.
1 parent 37424d1 commit 1f7ece5

1 file changed

Lines changed: 326 additions & 0 deletions

File tree

misc/incremental_checker.py

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
#!/usr/bin/env python3
2+
"""
3+
This file compares the output and runtime of running normal vs incremental mode
4+
on the history of any arbitrary git repo as a way of performing a sanity check
5+
to make sure incremental mode is working correctly and efficiently.
6+
7+
It does so by first running mypy without incremental mode on the specified range
8+
of commits to find the expected result, then rewinds back to the first commit and
9+
re-runs mypy on the commits with incremental mode enabled to make sure it returns
10+
the exact same result despite the files continuously changing.
11+
12+
This script will by default, download and test mypy's repo. So, doing:
13+
14+
python3 misc/incremental_checker.py last 30
15+
16+
...is equivalent to doing
17+
18+
python3 misc/incremental_checker.py last 30 \\
19+
--repo_url https://github.com/python/mypy.git \\
20+
--file-path mypy
21+
22+
You can chose to run this script against a specific commit id, or against the
23+
last n commits.
24+
25+
For example, to run this script against the last 30 commits, do:
26+
27+
python3 misc/incremental_checker.py last 30
28+
29+
To run this script starting from the commit id 2a432b, do:
30+
31+
python3 misc/incremental_checker.py commit 2a432b
32+
"""
33+
34+
from typing import Any, Dict, List, Tuple
35+
36+
from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentDefaultsHelpFormatter
37+
import json
38+
import os
39+
import shutil
40+
import subprocess
41+
import sys
42+
import textwrap
43+
import time
44+
45+
46+
CACHE_PATH = ".incremental_checker_cache.json"
47+
MYPY_REPO_URL = "https://github.com/python/mypy.git"
48+
MYPY_TARGET_FILE = "mypy"
49+
50+
JsonDict = Dict[str, Any]
51+
52+
53+
def print_offset(text: str, indent_length: int = 4) -> None:
54+
print()
55+
print(textwrap.indent(text, ' ' * indent_length))
56+
print()
57+
58+
59+
def delete_folder(folder_path: str) -> None:
60+
if os.path.exists(folder_path):
61+
shutil.rmtree(folder_path)
62+
63+
64+
def execute(command: List[str], fail_on_error: bool = True) -> Tuple[str, str, int]:
65+
proc = subprocess.Popen(
66+
' '.join(command),
67+
stderr=subprocess.PIPE,
68+
stdout=subprocess.PIPE,
69+
shell=True)
70+
stdout_bytes, stderr_bytes = proc.communicate() # type: Tuple[bytes, bytes]
71+
stdout, stderr = stdout_bytes.decode('utf-8'), stderr_bytes.decode('utf-8')
72+
if fail_on_error and proc.returncode != 0:
73+
print('EXECUTED COMMAND:', repr(command))
74+
print('RETURN CODE:', proc.returncode)
75+
print()
76+
print('STDOUT:')
77+
print_offset(stdout)
78+
print('STDERR:')
79+
print_offset(stderr)
80+
raise RuntimeError('Unexpected error from external tool.')
81+
return stdout, stderr, proc.returncode
82+
83+
84+
def ensure_environment_is_ready(mypy_path: str, temp_repo_path: str, mypy_cache_path: str) -> None:
85+
os.chdir(mypy_path)
86+
delete_folder(temp_repo_path)
87+
delete_folder(mypy_cache_path)
88+
89+
90+
def initialize_repo(repo_url: str, temp_repo_path: str, branch: str) -> None:
91+
print("Cloning repo {0} to {1}".format(repo_url, temp_repo_path))
92+
execute(["git", "clone", repo_url, temp_repo_path])
93+
if branch is not None:
94+
print("Checking out branch {}".format(branch))
95+
execute(["git", "-C", temp_repo_path, "checkout", branch])
96+
97+
98+
def get_commits(repo_folder_path: str, commit_range: str) -> List[Tuple[str, str]]:
99+
raw_data, _stderr, _errcode = execute([
100+
"git", "-C", repo_folder_path, "log", "--reverse", "--oneline", commit_range])
101+
output = []
102+
for line in raw_data.strip().split('\n'):
103+
commit_id, _, message = line.partition(' ')
104+
output.append((commit_id, message))
105+
return output
106+
107+
108+
def get_commits_starting_at(repo_folder_path: str, start_commit: str) -> List[Tuple[str, str]]:
109+
print("Fetching commits starting at {0}".format(start_commit))
110+
return get_commits(repo_folder_path, '{0}^..HEAD'.format(start_commit))
111+
112+
113+
def get_nth_commit(repo_folder_path, n: int) -> Tuple[str, str]:
114+
print("Fetching last {} commits (or all, if there are fewer commits than n)".format(n))
115+
return get_commits(repo_folder_path, '-{}'.format(n))[0]
116+
117+
118+
def run_mypy(target_file_path: str,
119+
mypy_cache_path: str,
120+
incremental: bool = True,
121+
verbose: bool = False) -> Tuple[float, str]:
122+
"""Runs mypy against `target_file_path` and returns what mypy prints to stdout as a string.
123+
124+
If `incremental` is set to True, this function will use store and retrieve all caching data
125+
inside `mypy_cache_path`. If `verbose` is set to True, this function will pass the "-v -v"
126+
flags to mypy to make it output debugging information.
127+
"""
128+
command = ["python3", "-m", "mypy", "--cache-dir", mypy_cache_path]
129+
if incremental:
130+
command.append("--incremental")
131+
if verbose:
132+
command.extend(["-v", "-v"])
133+
command.append(target_file_path)
134+
start = time.time()
135+
output, stderr, _ = execute(command, False)
136+
if stderr != "":
137+
output = stderr
138+
runtime = time.time() - start
139+
return runtime, output
140+
141+
142+
def load_cache(incremental_cache_path: str = CACHE_PATH) -> JsonDict:
143+
if os.path.exists(incremental_cache_path):
144+
with open(incremental_cache_path, 'r') as stream:
145+
return json.load(stream)
146+
else:
147+
return {}
148+
149+
150+
def save_cache(cache: JsonDict, incremental_cache_path: str = CACHE_PATH) -> None:
151+
with open(incremental_cache_path, 'w') as stream:
152+
json.dump(cache, stream, indent=2)
153+
154+
155+
def set_expected(commits: List[Tuple[str, str]],
156+
cache: JsonDict,
157+
temp_repo_path: str,
158+
target_file_path: str,
159+
mypy_cache_path: str) -> None:
160+
"""Populates the given `cache` with the expected results for all of the given `commits`.
161+
162+
This function runs mypy on the `target_file_path` inside the `temp_repo_path`, and stores
163+
the result in the `cache`.
164+
165+
If `cache` already contains results for a particular commit, this function will
166+
skip evaluating that commit and move on to the next."""
167+
for commit_id, message in commits:
168+
if commit_id in cache:
169+
print('Skipping commit (already cached): {0}: "{1}"'.format(commit_id, message))
170+
else:
171+
print('Caching expected output for commit {0}: "{1}"'.format(commit_id, message))
172+
execute(["git", "-C", temp_repo_path, "checkout", commit_id])
173+
runtime, output = run_mypy(target_file_path, mypy_cache_path, incremental=False)
174+
cache[commit_id] = {'runtime': runtime, 'output': output}
175+
if output == "":
176+
print(" Clean output ({:.3f} sec)".format(runtime))
177+
else:
178+
print(" Output ({:.3f} sec)".format(runtime))
179+
print_offset(output, 8)
180+
print()
181+
182+
183+
def test_incremental(commits: List[Tuple[str, str]],
184+
cache: JsonDict,
185+
temp_repo_path: str,
186+
target_file_path: str,
187+
mypy_cache_path: str) -> None:
188+
"""Runs incremental mode on all `commits` to verify the output matches the expected output.
189+
190+
This function runs mypy on the `target_file_path` inside the `temp_repo_path`. The
191+
expected output must be stored inside of the given `cache`.
192+
"""
193+
print("Note: first commit is evaluated twice to warm up cache")
194+
commits = [commits[0]] + commits
195+
for commit_id, message in commits:
196+
print('Now testing commit {0}: "{1}"'.format(commit_id, message))
197+
execute(["git", "-C", temp_repo_path, "checkout", commit_id])
198+
runtime, output = run_mypy(target_file_path, mypy_cache_path, incremental=True)
199+
expected_runtime = cache[commit_id]['runtime'] # type: float
200+
expected_output = cache[commit_id]['output'] # type: str
201+
if output != expected_output:
202+
print(" Output does not match expected result!")
203+
print(" Expected output ({:.3f} sec):".format(expected_runtime))
204+
print_offset(expected_output, 8)
205+
print(" Actual output: ({:.3f} sec):".format(runtime))
206+
print_offset(output, 8)
207+
else:
208+
print(" Output matches expected result!")
209+
print(" Incremental: {:.3f} sec".format(runtime))
210+
print(" Original: {:.3f} sec".format(expected_runtime))
211+
212+
213+
def cleanup(temp_repo_path: str, mypy_cache_path: str) -> None:
214+
delete_folder(temp_repo_path)
215+
delete_folder(mypy_cache_path)
216+
217+
218+
def test_repo(target_repo_url: str, temp_repo_path: str, target_file_path: str,
219+
mypy_path: str, incremental_cache_path: str, mypy_cache_path: str,
220+
range_type: str, range_start: str, branch: str) -> None:
221+
"""Tests incremental mode against the repo specified in `target_repo_url`.
222+
223+
This algorithm runs in five main stages:
224+
225+
1. Clones `target_repo_url` into the `temp_repo_path` folder locally,
226+
checking out the specified `branch` if applicable.
227+
2. Examines the repo's history to get the list of all commits to
228+
to test incremental mode on.
229+
3. Runs mypy WITHOUT incremental mode against the `target_file_path` (which is
230+
assumed to be located inside the `temp_repo_path`), testing each commit
231+
discovered in stage two.
232+
- If the results of running mypy WITHOUT incremental mode on a
233+
particular commit are already cached inside the `incremental_cache_path`,
234+
skip that commit to save time.
235+
- Cache the results after finishing.
236+
4. Rewind back to the first commit, and run mypy WITH incremental mode
237+
against the `target_file_path` commit-by-commit, and compare to the expected
238+
results found in stage 3.
239+
5. Delete all unnecessary temp files.
240+
"""
241+
# Stage 1: Clone repo and get ready to being testing
242+
ensure_environment_is_ready(mypy_path, temp_repo_path, mypy_cache_path)
243+
initialize_repo(target_repo_url, temp_repo_path, branch)
244+
245+
# Stage 2: Get all commits we want to test
246+
if range_type == "last":
247+
start_commit = get_nth_commit(temp_repo_path, int(range_start))[0]
248+
elif range_type == "commit":
249+
start_commit = range_start
250+
else:
251+
raise RuntimeError("Invalid option: {}".format(range_type))
252+
commits = get_commits_starting_at(temp_repo_path, start_commit)
253+
254+
# Stage 3: Find and cache expected results for each commit (without incremental mode)
255+
cache = load_cache(incremental_cache_path)
256+
set_expected(commits, cache, temp_repo_path, target_file_path, mypy_cache_path)
257+
save_cache(cache, incremental_cache_path)
258+
259+
# Stage 4: Rewind and re-run mypy (with incremental mode enabled)
260+
test_incremental(commits, cache, temp_repo_path, target_file_path, mypy_cache_path)
261+
262+
# Stage 5: Remove temp files
263+
cleanup(temp_repo_path, mypy_cache_path)
264+
265+
266+
def main() -> None:
267+
help_factory = (lambda prog: RawDescriptionHelpFormatter(prog=prog, max_help_position=32))
268+
parser = ArgumentParser(
269+
prog='incremental_checker',
270+
description=__doc__,
271+
formatter_class=help_factory)
272+
273+
parser.add_argument("range_type", metavar="START_TYPE", choices=["last", "commit"],
274+
help="must be one of 'last' or 'commit'")
275+
parser.add_argument("range_start", metavar="COMMIT_ID_OR_NUMBER",
276+
help="the commit id to start from, or the number of "
277+
"commits to move back (see above)")
278+
parser.add_argument("-r", "--repo_url", default=MYPY_REPO_URL, metavar="URL",
279+
help="the repo to clone and run tests on")
280+
parser.add_argument("-f", "--file-path", default=MYPY_TARGET_FILE, metavar="FILE",
281+
help="the name of the file or directory to typecheck")
282+
parser.add_argument("--cache-path", default=CACHE_PATH, metavar="DIR",
283+
help="sets a custom location to store cache data")
284+
parser.add_argument("--branch", default=None, metavar="NAME",
285+
help="check out and test a custom branch"
286+
"uses the default if not specified")
287+
288+
if len(sys.argv[1:]) == 0:
289+
parser.print_help()
290+
parser.exit()
291+
292+
params = parser.parse_args(sys.argv[1:])
293+
294+
# Make all paths absolute so we avoid having to worry about being in the right folder
295+
296+
# The path to this specific script (incremental_checker.py).
297+
script_path = os.path.abspath(sys.argv[0])
298+
299+
# The path to the mypy repo.
300+
mypy_path = os.path.abspath(os.path.dirname(os.path.dirname(script_path)))
301+
302+
# The folder the cloned repo will reside in.
303+
temp_repo_path = os.path.abspath(os.path.join(mypy_path, "tmp_repo"))
304+
305+
# The particular file or package to typecheck inside the repo.
306+
target_file_path = os.path.abspath(os.path.join(temp_repo_path, params.file_path))
307+
308+
# The path to where the incremental checker cache data is stored.
309+
incremental_cache_path = os.path.abspath(params.cache_path)
310+
311+
# The path to store the mypy incremental mode cache data
312+
mypy_cache_path = os.path.abspath(os.path.join(mypy_path, "misc", ".mypy_cache"))
313+
314+
print("Assuming mypy is located at {0}".format(mypy_path))
315+
print("Temp repo will be cloned at {0}".format(temp_repo_path))
316+
print("Testing file/dir located at {0}".format(target_file_path))
317+
print("Using cache data located at {0}".format(incremental_cache_path))
318+
print()
319+
320+
test_repo(params.repo_url, temp_repo_path, target_file_path,
321+
mypy_path, incremental_cache_path, mypy_cache_path,
322+
params.range_type, params.range_start, params.branch)
323+
324+
325+
if __name__ == '__main__':
326+
main()

0 commit comments

Comments
 (0)