88from collections .abc import Callable
99from collections .abc import Generator
1010from collections .abc import Sequence
11+ from typing import Any
1112
1213import pre_commit .constants as C
1314from pre_commit import clientlib
@@ -96,7 +97,7 @@ def __init__(self, directory: str | None = None) -> None:
9697 ' PRIMARY KEY (repo, ref)'
9798 ');' ,
9899 )
99- self ._create_config_table (db )
100+ self ._create_configs_table (db )
100101
101102 # Atomic file move
102103 os .replace (tmpfile , self .db_path )
@@ -215,7 +216,7 @@ def make_local(self, deps: Sequence[str]) -> str:
215216 'local' , C .LOCAL_REPO_VERSION , deps , _make_local_repo ,
216217 )
217218
218- def _create_config_table (self , db : sqlite3 .Connection ) -> None :
219+ def _create_configs_table (self , db : sqlite3 .Connection ) -> None :
219220 db .executescript (
220221 'CREATE TABLE IF NOT EXISTS configs ('
221222 ' path TEXT NOT NULL,'
@@ -232,28 +233,83 @@ def mark_config_used(self, path: str) -> None:
232233 return
233234 with self .connect () as db :
234235 # TODO: eventually remove this and only create in _create
235- self ._create_config_table (db )
236+ self ._create_configs_table (db )
236237 db .execute ('INSERT OR IGNORE INTO configs VALUES (?)' , (path ,))
237238
238- def select_all_configs (self ) -> list [str ]:
239- with self .connect () as db :
240- self ._create_config_table (db )
241- rows = db .execute ('SELECT path FROM configs' ).fetchall ()
242- return [path for path , in rows ]
239+ def _mark_used_repos (
240+ self ,
241+ all_repos : dict [tuple [str , str ], str ],
242+ unused_repos : set [tuple [str , str ]],
243+ repo : dict [str , Any ],
244+ ) -> None :
245+ if repo ['repo' ] == clientlib .META :
246+ return
247+ elif repo ['repo' ] == clientlib .LOCAL :
248+ for hook in repo ['hooks' ]:
249+ deps = hook .get ('additional_dependencies' )
250+ unused_repos .discard ((
251+ self .db_repo_name (repo ['repo' ], deps ),
252+ C .LOCAL_REPO_VERSION ,
253+ ))
254+ else :
255+ key = (repo ['repo' ], repo ['rev' ])
256+ path = all_repos .get (key )
257+ # can't inspect manifest if it isn't cloned
258+ if path is None :
259+ return
243260
244- def delete_configs (self , configs : list [str ]) -> None :
245- with self .connect () as db :
246- rows = [(path ,) for path in configs ]
247- db .executemany ('DELETE FROM configs WHERE path = ?' , rows )
261+ try :
262+ manifest = clientlib .load_manifest (
263+ os .path .join (path , C .MANIFEST_FILE ),
264+ )
265+ except clientlib .InvalidManifestError :
266+ return
267+ else :
268+ unused_repos .discard (key )
269+ by_id = {hook ['id' ]: hook for hook in manifest }
248270
249- def select_all_repos ( self ) -> list [ tuple [ str , str , str ] ]:
250- with self . connect () as db :
251- return db . execute ( 'SELECT repo, ref, path from repos' ). fetchall ()
271+ for hook in repo [ 'hooks' ]:
272+ if hook [ 'id' ] not in by_id :
273+ continue
252274
253- def delete_repo (self , db_repo_name : str , ref : str , path : str ) -> None :
254- with self .connect () as db :
255- db .execute (
275+ deps = hook .get (
276+ 'additional_dependencies' ,
277+ by_id [hook ['id' ]]['additional_dependencies' ],
278+ )
279+ unused_repos .discard ((
280+ self .db_repo_name (repo ['repo' ], deps ), repo ['rev' ],
281+ ))
282+
283+ def gc (self ) -> int :
284+ with self .exclusive_lock (), self .connect () as db :
285+ self ._create_configs_table (db )
286+
287+ repos = db .execute ('SELECT repo, ref, path FROM repos' ).fetchall ()
288+ all_repos = {(repo , ref ): path for repo , ref , path in repos }
289+ unused_repos = set (all_repos )
290+
291+ configs_rows = db .execute ('SELECT path FROM configs' ).fetchall ()
292+ configs = [path for path , in configs_rows ]
293+
294+ dead_configs = []
295+ for config_path in configs :
296+ try :
297+ config = clientlib .load_config (config_path )
298+ except clientlib .InvalidConfigError :
299+ dead_configs .append (config_path )
300+ continue
301+ else :
302+ for repo in config ['repos' ]:
303+ self ._mark_used_repos (all_repos , unused_repos , repo )
304+
305+ paths = [(path ,) for path in dead_configs ]
306+ db .executemany ('DELETE FROM configs WHERE path = ?' , paths )
307+
308+ db .executemany (
256309 'DELETE FROM repos WHERE repo = ? and ref = ?' ,
257- ( db_repo_name , ref ),
310+ sorted ( unused_repos ),
258311 )
259- rmtree (path )
312+ for k in unused_repos :
313+ rmtree (all_repos [k ])
314+
315+ return len (unused_repos )
0 commit comments