From f32f6f2dc542913337fdab0a419dc5c78764feb0 Mon Sep 17 00:00:00 2001
From: wyattscarpenter <wyattscarpenter@gmail.com>
Date: Tue, 14 Jan 2025 07:31:06 -0800
Subject: [PATCH 1/8] add stdev to perf_compare

---
 misc/perf_compare.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index ef9976b8e2eb..310ff0bd4340 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -171,13 +171,14 @@ def main() -> None:
     first = -1.0
     for commit in commits:
         tt = statistics.mean(results[commit])
+        s = statistics.stdev(results[commit])
         if first < 0:
             delta = "0.0%"
             first = tt
         else:
             d = (tt / first) - 1
             delta = f"{d:+.1%}"
-        print(f"{commit:<25} {tt:.3f}s ({delta})")
+        print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s}")
 
     shutil.rmtree(self_check_dir)
     for target_dir in target_dirs:

From 643afcb7eb43a4d6f3d1e99796c5e40cee01183d Mon Sep 17 00:00:00 2001
From: wyattscarpenter <wyattscarpenter@gmail.com>
Date: Wed, 15 Jan 2025 01:01:40 -0800
Subject: [PATCH 2/8] add -r, --dont-setup, epilog, and total time

---
 misc/perf_compare.py | 77 ++++++++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 310ff0bd4340..952ddd789565 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -44,13 +44,15 @@ def build_mypy(target_dir: str) -> None:
     subprocess.run(cmd, env=env, check=True, cwd=target_dir)
 
 
-def clone(target_dir: str, commit: str | None) -> None:
-    heading(f"Cloning mypy to {target_dir}")
-    repo_dir = os.getcwd()
+def clone(target_dir: str, commit: str | None, repo_source: str | None = None) -> None:
+    source_name = repo_source or "mypy"
+    heading(f"Cloning {source_name} to {target_dir}")
+    if repo_source is None:
+        repo_source = os.getcwd()
     if os.path.isdir(target_dir):
         print(f"{target_dir} exists: deleting")
         shutil.rmtree(target_dir)
-    subprocess.run(["git", "clone", repo_dir, target_dir], check=True)
+    subprocess.run(["git", "clone", repo_source, target_dir], check=True)
     if commit:
         subprocess.run(["git", "checkout", commit], check=True, cwd=target_dir)
 
@@ -64,7 +66,7 @@ def edit_python_file(fnam: str) -> None:
 
 
 def run_benchmark(
-    compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None
+    compiled_dir: str, check_dir: str, *, incremental: bool, code: str, foreign: bool | None
 ) -> float:
     cache_dir = os.path.join(compiled_dir, ".mypy_cache")
     if os.path.isdir(cache_dir) and not incremental:
@@ -76,6 +78,8 @@ def run_benchmark(
     cmd = [sys.executable, "-m", "mypy"]
     if code:
         cmd += ["-c", code]
+    elif foreign:
+        pass
     else:
         cmd += ["--config-file", os.path.join(abschk, "mypy_self_check.ini")]
         cmd += glob.glob(os.path.join(abschk, "mypy/*.py"))
@@ -86,18 +90,28 @@ def run_benchmark(
             edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py"))
     t0 = time.time()
     # Ignore errors, since some commits being measured may generate additional errors.
-    subprocess.run(cmd, cwd=compiled_dir, env=env)
+    if foreign:
+        subprocess.run(cmd, cwd=check_dir, env=env)
+    else:
+        subprocess.run(cmd, cwd=compiled_dir, env=env)
     return time.time() - t0
 
 
 def main() -> None:
-    parser = argparse.ArgumentParser()
+    whole_program_time_0 = time.time()
+    parser = argparse.ArgumentParser(epilog="Remember: you usually want the first argument to this command to be 'master'.")
     parser.add_argument(
         "--incremental",
         default=False,
         action="store_true",
         help="measure incremental run (fully cached)",
     )
+    parser.add_argument(
+        "--dont-setup",
+        default=False,
+        action="store_true",
+        help="don't make the dirs or compile mypy, just run the performance measurement benchmark",
+    )
     parser.add_argument(
         "--num-runs",
         metavar="N",
@@ -112,6 +126,14 @@ def main() -> None:
         type=int,
         help="set maximum number of parallel builds (default=8)",
     )
+    parser.add_argument(
+        "-r",
+        metavar="FOREIGN_REPOSITORY",
+        default=None,
+        type=str,
+        help="measure time to type check the project at FOREIGN_REPOSITORY instead of mypy self-check; " +
+          "provided value must be the URL or path of a git repo",
+    )
     parser.add_argument(
         "-c",
         metavar="CODE",
@@ -122,10 +144,12 @@ def main() -> None:
     parser.add_argument("commit", nargs="+", help="git revision to measure (e.g. branch name)")
     args = parser.parse_args()
     incremental: bool = args.incremental
+    dont_setup: bool = args.dont_setup
     commits = args.commit
     num_runs: int = args.num_runs + 1
     max_workers: int = args.j
     code: str | None = args.c
+    foreign_repo: str | None = args.r
 
     if not (os.path.isdir(".git") and os.path.isdir("mypyc")):
         sys.exit("error: Run this the mypy repo root")
@@ -134,20 +158,28 @@ def main() -> None:
     for i, commit in enumerate(commits):
         target_dir = f"mypy.{i}.tmpdir"
         target_dirs.append(target_dir)
-        clone(target_dir, commit)
+        if not dont_setup:
+            clone(target_dir, commit)
 
-    self_check_dir = "mypy.self.tmpdir"
-    clone(self_check_dir, commits[0])
+    if foreign_repo:
+        check_dir = "mypy.foreign.tmpdir"
+        if not dont_setup:
+            clone(check_dir, None, foreign_repo)
+    else:
+        check_dir = "mypy.self.tmpdir"
+        if not dont_setup:
+            clone(check_dir, commits[0])
 
-    heading("Compiling mypy")
-    print("(This will take a while...)")
+    if not dont_setup:
+        heading("Compiling mypy")
+        print("(This will take a while...)")
 
-    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
-        for future in as_completed(futures):
-            future.result()
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
+            for future in as_completed(futures):
+                future.result()
 
-    print(f"Finished compiling mypy ({len(commits)} builds)")
+        print(f"Finished compiling mypy ({len(commits)} builds)")
 
     heading("Performing measurements")
 
@@ -160,7 +192,7 @@ def main() -> None:
         items = list(enumerate(commits))
         random.shuffle(items)
         for i, commit in items:
-            tt = run_benchmark(target_dirs[i], self_check_dir, incremental=incremental, code=code)
+            tt = run_benchmark(target_dirs[i], check_dir, incremental=incremental, code=code, foreign=bool(foreign_repo))
             # Don't record the first warm-up run
             if n > 0:
                 print(f"{commit}: t={tt:.3f}s")
@@ -171,16 +203,19 @@ def main() -> None:
     first = -1.0
     for commit in commits:
         tt = statistics.mean(results[commit])
-        s = statistics.stdev(results[commit])
+        #pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1
+        s = statistics.pstdev(results[commit]) if len(results[commit]) > 1 else 0
         if first < 0:
             delta = "0.0%"
             first = tt
         else:
             d = (tt / first) - 1
             delta = f"{d:+.1%}"
-        print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s}")
+        print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")
+
+    print(f"Total time taken by the benchmarking program (including any setup): {time.time() - whole_program_time_0:.2f}s")
 
-    shutil.rmtree(self_check_dir)
+    shutil.rmtree(check_dir)
     for target_dir in target_dirs:
         shutil.rmtree(target_dir)
 

From 7848cf6f63781f771715267de57fb00656aa20a8 Mon Sep 17 00:00:00 2001
From: wyattscarpenter <wyattscarpenter@gmail.com>
Date: Wed, 15 Jan 2025 01:24:58 -0800
Subject: [PATCH 3/8] human format the time elapsed better

---
 misc/perf_compare.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 952ddd789565..888855d8040f 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -213,7 +213,11 @@ def main() -> None:
             delta = f"{d:+.1%}"
         print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")
 
-    print(f"Total time taken by the benchmarking program (including any setup): {time.time() - whole_program_time_0:.2f}s")
+    t  = int( time.time() - whole_program_time_0 )
+    total_time_taken_formatted = ", ".join(
+        f"{v} {n if v==1 else n+'s'}" for v, n in ((t//3600, "hour"), (t//60%60, "minute"), (t%60, "second")) if v
+    )
+    print(f"Total time taken by the whole benchmarking program (including any setup):", total_time_taken_formatted)
 
     shutil.rmtree(check_dir)
     for target_dir in target_dirs:

From 773fbb51886b0210b9f71a8a504ad0f81f9697de Mon Sep 17 00:00:00 2001
From: wyattscarpenter <wyattscarpenter@gmail.com>
Date: Wed, 15 Jan 2025 01:27:10 -0800
Subject: [PATCH 4/8] correct three editing errors

---
 misc/perf_compare.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 888855d8040f..b18bc9e08123 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -66,7 +66,7 @@ def edit_python_file(fnam: str) -> None:
 
 
 def run_benchmark(
-    compiled_dir: str, check_dir: str, *, incremental: bool, code: str, foreign: bool | None
+    compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None, foreign: bool | None
 ) -> float:
     cache_dir = os.path.join(compiled_dir, ".mypy_cache")
     if os.path.isdir(cache_dir) and not incremental:
@@ -213,11 +213,11 @@ def main() -> None:
             delta = f"{d:+.1%}"
         print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")
 
-    t  = int( time.time() - whole_program_time_0 )
+    t = int( time.time() - whole_program_time_0 )
     total_time_taken_formatted = ", ".join(
         f"{v} {n if v==1 else n+'s'}" for v, n in ((t//3600, "hour"), (t//60%60, "minute"), (t%60, "second")) if v
     )
-    print(f"Total time taken by the whole benchmarking program (including any setup):", total_time_taken_formatted)
+    print("Total time taken by the whole benchmarking program (including any setup):", total_time_taken_formatted)
 
     shutil.rmtree(check_dir)
     for target_dir in target_dirs:

From e5769a8dd4bb3897715775f1eb3b61645c4bd1fe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 15 Jan 2025 09:37:14 +0000
Subject: [PATCH 5/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 misc/perf_compare.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index b18bc9e08123..2716ecbfbabf 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -99,7 +99,9 @@ def run_benchmark(
 
 def main() -> None:
     whole_program_time_0 = time.time()
-    parser = argparse.ArgumentParser(epilog="Remember: you usually want the first argument to this command to be 'master'.")
+    parser = argparse.ArgumentParser(
+        epilog="Remember: you usually want the first argument to this command to be 'master'."
+    )
     parser.add_argument(
         "--incremental",
         default=False,
@@ -131,8 +133,8 @@ def main() -> None:
         metavar="FOREIGN_REPOSITORY",
         default=None,
         type=str,
-        help="measure time to type check the project at FOREIGN_REPOSITORY instead of mypy self-check; " +
-          "provided value must be the URL or path of a git repo",
+        help="measure time to type check the project at FOREIGN_REPOSITORY instead of mypy self-check; "
+        + "provided value must be the URL or path of a git repo",
     )
     parser.add_argument(
         "-c",
@@ -192,7 +194,13 @@ def main() -> None:
         items = list(enumerate(commits))
         random.shuffle(items)
         for i, commit in items:
-            tt = run_benchmark(target_dirs[i], check_dir, incremental=incremental, code=code, foreign=bool(foreign_repo))
+            tt = run_benchmark(
+                target_dirs[i],
+                check_dir,
+                incremental=incremental,
+                code=code,
+                foreign=bool(foreign_repo),
+            )
             # Don't record the first warm-up run
             if n > 0:
                 print(f"{commit}: t={tt:.3f}s")
@@ -203,7 +211,7 @@ def main() -> None:
     first = -1.0
     for commit in commits:
         tt = statistics.mean(results[commit])
-        #pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1
+        # pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1
         s = statistics.pstdev(results[commit]) if len(results[commit]) > 1 else 0
         if first < 0:
             delta = "0.0%"
@@ -213,11 +221,16 @@ def main() -> None:
             delta = f"{d:+.1%}"
         print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")
 
-    t = int( time.time() - whole_program_time_0 )
+    t = int(time.time() - whole_program_time_0)
     total_time_taken_formatted = ", ".join(
-        f"{v} {n if v==1 else n+'s'}" for v, n in ((t//3600, "hour"), (t//60%60, "minute"), (t%60, "second")) if v
+        f"{v} {n if v==1 else n+'s'}"
+        for v, n in ((t // 3600, "hour"), (t // 60 % 60, "minute"), (t % 60, "second"))
+        if v
+    )
+    print(
+        "Total time taken by the whole benchmarking program (including any setup):",
+        total_time_taken_formatted,
     )
-    print("Total time taken by the whole benchmarking program (including any setup):", total_time_taken_formatted)
 
     shutil.rmtree(check_dir)
     for target_dir in target_dirs:

From 1b435f2c955d45a3f73faa41f226a49543cf30b7 Mon Sep 17 00:00:00 2001
From: wyattscarpenter <wyattscarpenter@gmail.com>
Date: Thu, 8 May 2025 14:16:07 -0700
Subject: [PATCH 6/8] fix an omitted word in the error message, and elaborate
 the message while at it

---
 misc/perf_compare.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 2716ecbfbabf..6c89afcddd8b 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -154,7 +154,7 @@ def main() -> None:
     foreign_repo: str | None = args.r
 
     if not (os.path.isdir(".git") and os.path.isdir("mypyc")):
-        sys.exit("error: Run this the mypy repo root")
+        sys.exit("error: You must run this script from the mypy repo root")
 
     target_dirs = []
     for i, commit in enumerate(commits):

From d03a1996f125231e74fae2c45e64f8e770fb02d0 Mon Sep 17 00:00:00 2001
From: wyattscarpenter <wyattscarpenter@gmail.com>
Date: Thu, 8 May 2025 20:28:22 -0700
Subject: [PATCH 7/8] improve documentation of perf_compare in cli

---
 misc/perf_compare.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 6c89afcddd8b..658a727973b0 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -9,7 +9,7 @@
  * Create a temp clone of the mypy repo for each target commit to measure
  * Checkout a target commit in each of the clones
  * Compile mypyc in each of the clones *in parallel*
- * Create another temp clone of the mypy repo as the code to check
+ * Create another temp clone of the first provided revision (or, with -r, a foreign repo) as the code to check
  * Self check with each of the compiled mypys N times
  * Report the average runtimes and relative performance
  * Remove the temp clones
@@ -100,6 +100,8 @@ def run_benchmark(
 def main() -> None:
     whole_program_time_0 = time.time()
     parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=__doc__,
         epilog="Remember: you usually want the first argument to this command to be 'master'."
     )
     parser.add_argument(
@@ -112,7 +114,8 @@ def main() -> None:
         "--dont-setup",
         default=False,
         action="store_true",
-        help="don't make the dirs or compile mypy, just run the performance measurement benchmark",
+        help="don't make the clones or compile mypy, just run the performance measurement benchmark "
+          + "(this will fail unless the clones already exist, such as from a previous run that was canceled before it deleted them)",
     )
     parser.add_argument(
         "--num-runs",
@@ -133,8 +136,9 @@ def main() -> None:
         metavar="FOREIGN_REPOSITORY",
         default=None,
         type=str,
-        help="measure time to type check the project at FOREIGN_REPOSITORY instead of mypy self-check; "
-        + "provided value must be the URL or path of a git repo",
+        help="measure time to typecheck the project at FOREIGN_REPOSITORY instead of mypy self-check; "
+        + "the provided value must be the URL or path of a git repo "
+        + "(note that this script will take no special steps to *install* the foreign repo, so you will probably get a lot of missing import errors)",
     )
     parser.add_argument(
         "-c",
@@ -143,7 +147,7 @@ def main() -> None:
         type=str,
         help="measure time to type check Python code fragment instead of mypy self-check",
     )
-    parser.add_argument("commit", nargs="+", help="git revision to measure (e.g. branch name)")
+    parser.add_argument("commit", nargs="+", help="git revision(s), e.g. branch name or commit id, to measure the performance of")
     args = parser.parse_args()
     incremental: bool = args.incremental
     dont_setup: bool = args.dont_setup

From cfb02e9930e7b27602e0ef6d4ead1503a216f7ad Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 9 May 2025 03:30:21 +0000
Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 misc/perf_compare.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 658a727973b0..025d4065561e 100644
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -102,7 +102,7 @@ def main() -> None:
     parser = argparse.ArgumentParser(
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description=__doc__,
-        epilog="Remember: you usually want the first argument to this command to be 'master'."
+        epilog="Remember: you usually want the first argument to this command to be 'master'.",
     )
     parser.add_argument(
         "--incremental",
@@ -115,7 +115,7 @@ def main() -> None:
         default=False,
         action="store_true",
         help="don't make the clones or compile mypy, just run the performance measurement benchmark "
-          + "(this will fail unless the clones already exist, such as from a previous run that was canceled before it deleted them)",
+        + "(this will fail unless the clones already exist, such as from a previous run that was canceled before it deleted them)",
     )
     parser.add_argument(
         "--num-runs",
@@ -147,7 +147,11 @@ def main() -> None:
         type=str,
         help="measure time to type check Python code fragment instead of mypy self-check",
     )
-    parser.add_argument("commit", nargs="+", help="git revision(s), e.g. branch name or commit id, to measure the performance of")
+    parser.add_argument(
+        "commit",
+        nargs="+",
+        help="git revision(s), e.g. branch name or commit id, to measure the performance of",
+    )
     args = parser.parse_args()
     incremental: bool = args.incremental
     dont_setup: bool = args.dont_setup