Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Add some functionality to misc/perf_compare.py #18471

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 10, 2025
107 changes: 84 additions & 23 deletions misc/perf_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
* Create a temp clone of the mypy repo for each target commit to measure
* Checkout a target commit in each of the clones
* Compile mypyc in each of the clones *in parallel*
* Create another temp clone of the mypy repo as the code to check
* Create another temp clone of the first provided revision (or, with -r, a foreign repo) as the code to check
* Self check with each of the compiled mypys N times
* Report the average runtimes and relative performance
* Remove the temp clones
Expand Down Expand Up @@ -44,13 +44,15 @@ def build_mypy(target_dir: str) -> None:
subprocess.run(cmd, env=env, check=True, cwd=target_dir)


def clone(target_dir: str, commit: str | None) -> None:
heading(f"Cloning mypy to {target_dir}")
repo_dir = os.getcwd()
def clone(target_dir: str, commit: str | None, repo_source: str | None = None) -> None:
source_name = repo_source or "mypy"
heading(f"Cloning {source_name} to {target_dir}")
if repo_source is None:
repo_source = os.getcwd()
if os.path.isdir(target_dir):
print(f"{target_dir} exists: deleting")
shutil.rmtree(target_dir)
subprocess.run(["git", "clone", repo_dir, target_dir], check=True)
subprocess.run(["git", "clone", repo_source, target_dir], check=True)
if commit:
subprocess.run(["git", "checkout", commit], check=True, cwd=target_dir)

Expand All @@ -64,7 +66,7 @@ def edit_python_file(fnam: str) -> None:


def run_benchmark(
compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None
compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None, foreign: bool | None
) -> float:
cache_dir = os.path.join(compiled_dir, ".mypy_cache")
if os.path.isdir(cache_dir) and not incremental:
Expand All @@ -76,6 +78,8 @@ def run_benchmark(
cmd = [sys.executable, "-m", "mypy"]
if code:
cmd += ["-c", code]
elif foreign:
pass
else:
cmd += ["--config-file", os.path.join(abschk, "mypy_self_check.ini")]
cmd += glob.glob(os.path.join(abschk, "mypy/*.py"))
Expand All @@ -86,18 +90,33 @@ def run_benchmark(
edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py"))
t0 = time.time()
# Ignore errors, since some commits being measured may generate additional errors.
subprocess.run(cmd, cwd=compiled_dir, env=env)
if foreign:
subprocess.run(cmd, cwd=check_dir, env=env)
else:
subprocess.run(cmd, cwd=compiled_dir, env=env)
return time.time() - t0


def main() -> None:
parser = argparse.ArgumentParser()
whole_program_time_0 = time.time()
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=__doc__,
epilog="Remember: you usually want the first argument to this command to be 'master'.",
)
parser.add_argument(
"--incremental",
default=False,
action="store_true",
help="measure incremental run (fully cached)",
)
parser.add_argument(
"--dont-setup",
default=False,
action="store_true",
help="don't make the clones or compile mypy, just run the performance measurement benchmark "
+ "(this will fail unless the clones already exist, such as from a previous run that was canceled before it deleted them)",
)
parser.add_argument(
"--num-runs",
metavar="N",
Expand All @@ -112,42 +131,65 @@ def main() -> None:
type=int,
help="set maximum number of parallel builds (default=8)",
)
parser.add_argument(
"-r",
metavar="FOREIGN_REPOSITORY",
default=None,
type=str,
help="measure time to typecheck the project at FOREIGN_REPOSITORY instead of mypy self-check; "
+ "the provided value must be the URL or path of a git repo "
+ "(note that this script will take no special steps to *install* the foreign repo, so you will probably get a lot of missing import errors)",
)
parser.add_argument(
"-c",
metavar="CODE",
default=None,
type=str,
help="measure time to type check Python code fragment instead of mypy self-check",
)
parser.add_argument("commit", nargs="+", help="git revision to measure (e.g. branch name)")
parser.add_argument(
"commit",
nargs="+",
help="git revision(s), e.g. branch name or commit id, to measure the performance of",
)
args = parser.parse_args()
incremental: bool = args.incremental
dont_setup: bool = args.dont_setup
commits = args.commit
num_runs: int = args.num_runs + 1
max_workers: int = args.j
code: str | None = args.c
foreign_repo: str | None = args.r

if not (os.path.isdir(".git") and os.path.isdir("mypyc")):
sys.exit("error: Run this the mypy repo root")
sys.exit("error: You must run this script from the mypy repo root")

target_dirs = []
for i, commit in enumerate(commits):
target_dir = f"mypy.{i}.tmpdir"
target_dirs.append(target_dir)
clone(target_dir, commit)
if not dont_setup:
clone(target_dir, commit)

self_check_dir = "mypy.self.tmpdir"
clone(self_check_dir, commits[0])
if foreign_repo:
check_dir = "mypy.foreign.tmpdir"
if not dont_setup:
clone(check_dir, None, foreign_repo)
else:
check_dir = "mypy.self.tmpdir"
if not dont_setup:
clone(check_dir, commits[0])

heading("Compiling mypy")
print("(This will take a while...)")
if not dont_setup:
heading("Compiling mypy")
print("(This will take a while...)")

with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
for future in as_completed(futures):
future.result()
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
for future in as_completed(futures):
future.result()

print(f"Finished compiling mypy ({len(commits)} builds)")
print(f"Finished compiling mypy ({len(commits)} builds)")

heading("Performing measurements")

Expand All @@ -160,7 +202,13 @@ def main() -> None:
items = list(enumerate(commits))
random.shuffle(items)
for i, commit in items:
tt = run_benchmark(target_dirs[i], self_check_dir, incremental=incremental, code=code)
tt = run_benchmark(
target_dirs[i],
check_dir,
incremental=incremental,
code=code,
foreign=bool(foreign_repo),
)
# Don't record the first warm-up run
if n > 0:
print(f"{commit}: t={tt:.3f}s")
Expand All @@ -171,15 +219,28 @@ def main() -> None:
first = -1.0
for commit in commits:
tt = statistics.mean(results[commit])
# pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1
s = statistics.pstdev(results[commit]) if len(results[commit]) > 1 else 0
if first < 0:
delta = "0.0%"
first = tt
else:
d = (tt / first) - 1
delta = f"{d:+.1%}"
print(f"{commit:<25} {tt:.3f}s ({delta})")
print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")

t = int(time.time() - whole_program_time_0)
total_time_taken_formatted = ", ".join(
f"{v} {n if v==1 else n+'s'}"
for v, n in ((t // 3600, "hour"), (t // 60 % 60, "minute"), (t % 60, "second"))
if v
)
print(
"Total time taken by the whole benchmarking program (including any setup):",
total_time_taken_formatted,
)

shutil.rmtree(self_check_dir)
shutil.rmtree(check_dir)
for target_dir in target_dirs:
shutil.rmtree(target_dir)

Expand Down