Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a79b100

Browse files
authored
Fix fine-grained deps in diff-cache.py tool + add test (#20837)
The delta was calculated the wrong way. Add a basic integration test for cache diffing and applying the diff. It only uses the fixed-format cache and sqlite caches, as these are expected to be the long-term defaults. The cache diff tests are pretty expensive to run, so we only have one test for now. I used a coding agent to write the test in multiple reviewed increments.
1 parent d3de4a4 commit a79b100

3 files changed

Lines changed: 216 additions & 5 deletions

File tree

misc/diff-cache.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ def main() -> None:
142142
parser = argparse.ArgumentParser()
143143
parser.add_argument("--verbose", action="store_true", default=False, help="Increase verbosity")
144144
parser.add_argument("--sqlite", action="store_true", default=False, help="Use a sqlite cache")
145-
parser.add_argument("input_dir1", help="Input directory for the cache")
146-
parser.add_argument("input_dir2", help="Input directory for the cache")
147-
parser.add_argument("output", help="Output file")
145+
parser.add_argument("input_dir1", help="Input directory for the original cache")
146+
parser.add_argument("input_dir2", help="Input directory for the target cache")
147+
parser.add_argument("output", help="Output file with the diff from original cache")
148148
args = parser.parse_args()
149149

150150
cache1 = make_cache(args.input_dir1, args.sqlite)
@@ -199,7 +199,7 @@ def main() -> None:
199199

200200
# Compute what deps have been added and merge them all into the
201201
# @root deps file.
202-
new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
202+
new_deps = {k: deps2.get(k, set()) - deps1.get(k, set()) for k in deps2}
203203
new_deps = {k: v for k, v in new_deps.items() if v}
204204
try:
205205
root_deps = load(cache1, "@root.deps.json")

mypy/test/test_diff_cache.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
"""Integration tests for misc/diff-cache.py and misc/apply-cache-diff.py."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import os
7+
import shutil
8+
import subprocess
9+
import sys
10+
import tempfile
11+
import time
12+
import unittest
13+
14+
from mypy.test.config import PREFIX
15+
16+
_MISC_DIR = os.path.join(PREFIX, "misc")
17+
_DIFF_CACHE_PATH = os.path.join(_MISC_DIR, "diff-cache.py")
18+
_APPLY_CACHE_DIFF_PATH = os.path.join(_MISC_DIR, "apply-cache-diff.py")
19+
20+
21+
class DiffCacheIntegrationTests(unittest.TestCase):
22+
"""Run mypy twice with different sources, diff the caches, and apply the diff."""
23+
24+
def test_diff_cache_produces_valid_json(self) -> None:
25+
# Use a single source directory with two cache directories so that
26+
# source paths in the cache metadata are identical between runs.
27+
# b.py is modified and c.py is added in the second run.
28+
src_dir = tempfile.mkdtemp()
29+
output_file = os.path.join(tempfile.mkdtemp(), "diff.json")
30+
env = os.environ.copy()
31+
env["PYTHONPATH"] = PREFIX
32+
try:
33+
cache1 = os.path.join(src_dir, "cache1")
34+
cache2 = os.path.join(src_dir, "cache2")
35+
36+
# Write sources and run mypy for cache1 (using sqlite cache)
37+
with open(os.path.join(src_dir, "a.py"), "w") as f:
38+
f.write("x: int = 1\n")
39+
with open(os.path.join(src_dir, "b.py"), "w") as f:
40+
f.write("import a\ndef foo() -> int:\n return 1\n")
41+
result = subprocess.run(
42+
[
43+
sys.executable,
44+
"-m",
45+
"mypy",
46+
"--sqlite-cache",
47+
"--cache-fine-grained",
48+
"--cache-dir",
49+
cache1,
50+
"b.py",
51+
],
52+
cwd=src_dir,
53+
capture_output=True,
54+
text=True,
55+
env=env,
56+
)
57+
assert result.returncode == 0, f"mypy run 1 failed: {result.stderr}"
58+
59+
# Sleep so that mtimes will be different between runs
60+
time.sleep(1)
61+
62+
# Touch a.py to change its mtime without modifying content
63+
os.utime(os.path.join(src_dir, "a.py"))
64+
65+
# Modify b.py to access a.x (adding a fine-grained dependency),
66+
# and add a new c.py, then run mypy for cache2
67+
with open(os.path.join(src_dir, "b.py"), "w") as f:
68+
f.write("import a\ndef foo() -> str:\n return str(a.x)\n")
69+
with open(os.path.join(src_dir, "c.py"), "w") as f:
70+
f.write("import a\ny: str = 'world'\n")
71+
result = subprocess.run(
72+
[
73+
sys.executable,
74+
"-m",
75+
"mypy",
76+
"--sqlite-cache",
77+
"--cache-fine-grained",
78+
"--cache-dir",
79+
cache2,
80+
"b.py",
81+
"c.py",
82+
],
83+
cwd=src_dir,
84+
capture_output=True,
85+
text=True,
86+
env=env,
87+
)
88+
assert result.returncode == 0, f"mypy run 2 failed: {result.stderr}"
89+
90+
# Find the Python version subdirectory (e.g. "3.14")
91+
subdirs = [
92+
e
93+
for e in os.listdir(cache1)
94+
if os.path.isdir(os.path.join(cache1, e)) and e[0].isdigit()
95+
]
96+
assert len(subdirs) == 1, f"Expected one version subdir, got {subdirs}"
97+
ver = subdirs[0]
98+
99+
# Run diff-cache.py with --sqlite
100+
result = subprocess.run(
101+
[
102+
sys.executable,
103+
_DIFF_CACHE_PATH,
104+
"--sqlite",
105+
os.path.join(cache1, ver),
106+
os.path.join(cache2, ver),
107+
output_file,
108+
],
109+
capture_output=True,
110+
text=True,
111+
env=env,
112+
)
113+
assert result.returncode == 0, f"diff-cache.py failed: {result.stderr}"
114+
115+
# Verify the output is valid JSON
116+
with open(output_file) as f:
117+
data = json.load(f)
118+
assert isinstance(data, dict)
119+
assert len(data) > 0, "Expected non-empty diff"
120+
121+
# Only modified or new files should appear in the diff.
122+
# b.py changed and c.py is new, so both should be present.
123+
# a.py did not change, so no a.* keys should appear.
124+
keys = set(data.keys())
125+
b_keys = {k for k in keys if "/b." in k or k.startswith("b.")}
126+
c_keys = {k for k in keys if "/c." in k or k.startswith("c.")}
127+
a_keys = {k for k in keys if "/a." in k or k.startswith("a.")}
128+
assert len(a_keys) == 0, f"Unexpected a.* entries in diff: {a_keys}"
129+
assert len(b_keys) == 2, f"Expected 2 b.* entries in diff, got: {b_keys}"
130+
assert len(c_keys) == 3, f"Expected 3 c.* entries in diff, got: {c_keys}"
131+
132+
# The new access to a.x in b.py should create a fine-grained
133+
# dependency recorded in @root.deps.json.
134+
assert "@root.deps.json" in keys
135+
root_deps = json.loads(data["@root.deps.json"])
136+
assert set(root_deps.keys()) == {
137+
"<a.x>",
138+
"<a>",
139+
}, f"Unexpected root deps keys: {sorted(root_deps.keys())}"
140+
assert sorted(root_deps["<a.x>"]) == ["b.foo"]
141+
assert sorted(root_deps["<a>"]) == ["b.foo", "c"]
142+
143+
# Apply the diff to a copy of cache1 and verify the result.
144+
cache2_ver = os.path.join(cache2, ver)
145+
patched = os.path.join(src_dir, "patched")
146+
patched_ver = os.path.join(patched, ver)
147+
shutil.copytree(cache1, patched)
148+
149+
# Snapshot cache entries before applying the diff
150+
from mypy.metastore import SqliteMetadataStore
151+
152+
def read_all(cache_dir: str) -> dict[str, bytes]:
153+
store = SqliteMetadataStore(cache_dir)
154+
result = {name: store.read(name) for name in store.list_all()}
155+
assert store.db is not None
156+
store.db.close()
157+
return result
158+
159+
before = read_all(patched_ver)
160+
161+
# Apply the diff
162+
result = subprocess.run(
163+
[sys.executable, _APPLY_CACHE_DIFF_PATH, "--sqlite", patched_ver, output_file],
164+
capture_output=True,
165+
text=True,
166+
env=env,
167+
)
168+
assert result.returncode == 0, f"apply-cache-diff.py failed: {result.stderr}"
169+
170+
after = read_all(patched_ver)
171+
172+
# a.py entries should be unchanged
173+
for name in before:
174+
if name.startswith("a.") or "/a." in name:
175+
assert name in after, f"{name} missing after apply"
176+
assert before[name] == after[name], f"{name} changed after apply"
177+
178+
# b.py and c.py entries should match cache2 after applying the diff.
179+
# Skip .meta.ff files since they contain mtimes that legitimately differ.
180+
target = read_all(cache2_ver)
181+
for prefix in ("b.", "c."):
182+
for name in target:
183+
if not (name.startswith(prefix) or f"/{prefix}" in name):
184+
continue
185+
assert name in after, f"{name} missing after apply"
186+
if name.endswith(".meta.ff"):
187+
# mtimes legitimately differ, but content should not be identical
188+
# to the pre-apply version (it was updated by the diff)
189+
assert after[name] != before.get(name), f"{name} unchanged after apply"
190+
else:
191+
assert after[name] == target[name], f"{name} differs from target"
192+
193+
# Verify fine-grained deps were applied correctly
194+
from mypy.util import json_loads
195+
196+
applied_root_deps = json_loads(after["@root.deps.json"])
197+
assert set(applied_root_deps.keys()) == {
198+
"<a.x>",
199+
"<a>",
200+
}, f"Unexpected applied root deps keys: {sorted(applied_root_deps.keys())}"
201+
assert sorted(applied_root_deps["<a.x>"]) == ["b.foo"]
202+
assert sorted(applied_root_deps["<a>"]) == ["b.foo", "c"]
203+
finally:
204+
shutil.rmtree(src_dir, ignore_errors=True)
205+
shutil.rmtree(os.path.dirname(output_file), ignore_errors=True)
206+
207+
208+
if __name__ == "__main__":
209+
unittest.main()

runtests.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
MYPYC_SEPARATE = "TestRunSeparate"
2121
MYPYC_MULTIMODULE = "multimodule" # Subset of mypyc run tests that are slow
2222
ERROR_STREAM = "ErrorStreamSuite"
23+
DIFF_CACHE = "DiffCacheIntegrationTests"
2324

2425

2526
ALL_NON_FAST = [
@@ -35,6 +36,7 @@
3536
MYPYC_COMMAND_LINE,
3637
MYPYC_SEPARATE,
3738
ERROR_STREAM,
39+
DIFF_CACHE,
3840
]
3941

4042

@@ -90,7 +92,7 @@
9092
"pytest",
9193
"-q",
9294
"-k",
93-
" or ".join([DAEMON, MYPYC_EXTERNAL, MYPYC_COMMAND_LINE, ERROR_STREAM]),
95+
" or ".join([DAEMON, MYPYC_EXTERNAL, MYPYC_COMMAND_LINE, ERROR_STREAM, DIFF_CACHE]),
9496
],
9597
"mypyc-fast": ["pytest", "-q", "mypyc", "-k", f"not ({' or '.join(MYPYC_SLOW)})"],
9698
# Test cases that might take minutes to run

0 commit comments

Comments
 (0)