From ac89dea8a4ab854ad4595612843493d20d316380 Mon Sep 17 00:00:00 2001 From: Albert Torosyan Date: Tue, 1 Apr 2025 13:22:38 +0400 Subject: [PATCH 1/5] [feat] Skip progress marker file deletion after indexing --- aim/storage/rockscontainer.pyx | 7 ------- 1 file changed, 7 deletions(-) diff --git a/aim/storage/rockscontainer.pyx b/aim/storage/rockscontainer.pyx index d21e28c32..0f8c3de25 100644 --- a/aim/storage/rockscontainer.pyx +++ b/aim/storage/rockscontainer.pyx @@ -159,16 +159,9 @@ class RocksContainer(Container): Store the collection of `(key, value)` records in the :obj:`Container` `index` for fast reads. """ - if not self._progress_path: - return - for k, v in self.items(): index[k] = v - if self._progress_path.exists(): - self._progress_path.unlink() - self._progress_path = None - def close(self): """Close all the resources.""" if self._resources is None: From 6de4427aea1e86ca334ebd590479a507b883b72e Mon Sep 17 00:00:00 2001 From: Albert Torosyan Date: Tue, 1 Apr 2025 13:29:18 +0400 Subject: [PATCH 2/5] [feat] Detect failed runs and marked them as finished --- aim/sdk/run_status_manager.py | 93 +++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 aim/sdk/run_status_manager.py diff --git a/aim/sdk/run_status_manager.py b/aim/sdk/run_status_manager.py new file mode 100644 index 000000000..d5562628e --- /dev/null +++ b/aim/sdk/run_status_manager.py @@ -0,0 +1,93 @@ +import time +import os +import datetime +import pytz +import threading +from pathlib import Path + +from typing import Iterable + +import aimrocks.errors + +from aim import Repo +from aim.sdk.run_status_watcher import Event + + +class RunStatusManager: + INDEXING_GRACE_PERIOD = 10 + + def __init__(self, repo: Repo, scan_interval: int = 60): + self.repo = repo + self.scan_interval = scan_interval + + self.progress_dir = Path(self.repo.path) / 'meta' / 'progress' + self.progress_dir.mkdir(parents=True, exist_ok=True) + + self.heartbeat_dir = Path(self.repo.path) / 'check_ins' + self.run_heartbeat_cache = {} + + self._stop_event = threading.Event() + self._monitor_thread = None + self._corrupted_runs = set() + + def start(self): + if not self._monitor_thread or not self._monitor_thread.is_alive(): + self._stop_event.clear() + self._monitor_thread = threading.Thread(target=self._run_forever, daemon=True) + self._monitor_thread.start() + + def stop(self): + self._stop_event.set() + if self._monitor_thread: + self._monitor_thread.join() + + def _run_forever(self): + while not self._stop_event.is_set(): + self.check_and_terminate_stalled_runs() + time.sleep(self.scan_interval) + + def _runs_with_progress(self) -> Iterable[str]: + runs_with_progress = filter(lambda x: x not in self._corrupted_runs, os.listdir(self.progress_dir)) + run_hashes = sorted(runs_with_progress, key=lambda r: os.path.getmtime(os.path.join(self.progress_dir, r))) + return run_hashes + + def check_and_terminate_stalled_runs(self): + for run_hash in self._runs_with_progress(): + if self._is_run_stalled(run_hash): + self._mark_run_as_terminated(run_hash) + + def _is_run_stalled(self, run_hash: str) -> bool: + stalled = False + + heartbeat_files = list(sorted(self.heartbeat_dir.glob(f'{run_hash}-*-progress-*-*'), reverse=True)) + if heartbeat_files: + latest_file = heartbeat_files[0].name + last_heartbeat = Event(latest_file) + + last_recorded_heartbeat = self.run_heartbeat_cache.get(run_hash) + if last_recorded_heartbeat is None: + # First time seeing a heartbeat for this run; store and move on + self.run_heartbeat_cache[run_hash] = last_heartbeat + elif last_heartbeat.idx > last_recorded_heartbeat.idx: + # Newer heartbeat arrived, so the run isn't stalled + self.run_heartbeat_cache[run_hash] = last_heartbeat + else: + # No new heartbeat event since last time; check if enough time passed + time_passed = time.time() - last_recorded_heartbeat.detected_epoch_time + if (last_recorded_heartbeat.next_event_in + RunStatusManager.INDEXING_GRACE_PERIOD) < time_passed: + stalled = True + else: + stalled = True + + return stalled + + def _mark_run_as_terminated(self, run_hash: str): + # TODO [AT]: Add run state handling once decided on terms (finished, terminated, aborted, etc.) + try: + meta_run_tree = self.repo.request_tree('meta', run_hash, read_only=False).subtree( + ('meta', 'chunks', run_hash) + ) + if meta_run_tree.get('end_time') is None: + meta_run_tree['end_time'] = datetime.datetime.now(pytz.utc).timestamp() + except (aimrocks.errors.RocksIOError, aimrocks.errors.Corruption): + self._corrupted_runs.add(run_hash) From f9d289fd1e4cff69047478b0b86d7b74b2e4d0e5 Mon Sep 17 00:00:00 2001 From: Albert Torosyan Date: Tue, 1 Apr 2025 13:31:04 +0400 Subject: [PATCH 3/5] [feat] Start run status manager when `aim up` is called --- aim/cli/up/commands.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aim/cli/up/commands.py b/aim/cli/up/commands.py index e294c4f9b..4775e0aa8 100644 --- a/aim/cli/up/commands.py +++ b/aim/cli/up/commands.py @@ -13,6 +13,7 @@ ) from aim.sdk.index_manager import RepoIndexManager from aim.sdk.repo import Repo +from aim.sdk.run_status_manager import RunStatusManager from aim.sdk.utils import clean_repo_path from aim.web.configs import ( AIM_ENV_MODE_KEY, @@ -124,6 +125,8 @@ def up( os.environ[AIM_PROFILER_KEY] = '1' RepoIndexManager.get_index_manager(repo_inst) + run_status_mng = RunStatusManager(repo_inst) + run_status_mng.start() try: server_cmd = build_uvicorn_command( 'aim.web.run:app', From f9c3a4c3a4b8d40159ce2f70f8cb9c791a68420e Mon Sep 17 00:00:00 2001 From: Albert Torosyan Date: Tue, 1 Apr 2025 13:31:54 +0400 Subject: [PATCH 4/5] [fix] Ruff format errors --- aim/sdk/repo.py | 23 ++- aim/sdk/reporter/file_manager.py | 6 +- aim/sdk/run_status_watcher.py | 9 +- aim/storage/arrayview.py | 12 +- aim/storage/artifacts/artifact_storage.py | 9 +- aim/storage/inmemorytreeview.py | 6 +- aim/storage/query.py | 3 +- aim/storage/structured/entities.py | 171 ++++++++++++++-------- aim/storage/treeview.py | 39 +++-- 9 files changed, 186 insertions(+), 92 deletions(-) diff --git a/aim/sdk/repo.py b/aim/sdk/repo.py index b37838421..151a56f86 100644 --- a/aim/sdk/repo.py +++ b/aim/sdk/repo.py @@ -269,7 +269,9 @@ def get_version(cls, path: str): def is_remote_path(cls, path: str): return path.startswith('aim://') - def _get_container(self, name: str, read_only: bool, from_union: bool = False, skip_read_optimization: bool = False) -> Container: + def _get_container( + self, name: str, read_only: bool, from_union: bool = False, skip_read_optimization: bool = False + ) -> Container: # TODO [AT]: refactor get container/tree logic to make it more simple if self.read_only and not read_only: raise ValueError('Repo is read-only') @@ -317,11 +319,17 @@ def request_tree( read_only: bool, from_union: bool = False, # TODO maybe = True by default no_cache: bool = False, - skip_read_optimization: bool = False + skip_read_optimization: bool = False, ): if not self.is_remote_repo: - return self.request(name, sub, read_only=read_only, from_union=from_union, no_cache=no_cache, - skip_read_optimization=skip_read_optimization).tree() + return self.request( + name, + sub, + read_only=read_only, + from_union=from_union, + no_cache=no_cache, + skip_read_optimization=skip_read_optimization, + ).tree() else: return ProxyTree(self._client, name, sub, read_only=read_only, from_union=from_union, no_cache=no_cache) @@ -333,7 +341,7 @@ def request( read_only: bool, from_union: bool = False, # TODO maybe = True by default no_cache: bool = False, - skip_read_optimization: bool = False + skip_read_optimization: bool = False, ): container_config = ContainerConfig(name, sub, read_only) container_view = self.container_view_pool.get(container_config) @@ -344,8 +352,9 @@ def request( else: assert sub is not None path = os.path.join(name, 'chunks', sub) - container = self._get_container(path, read_only=True, from_union=from_union, - skip_read_optimization=skip_read_optimization) + container = self._get_container( + path, read_only=True, from_union=from_union, skip_read_optimization=skip_read_optimization + ) else: assert sub is not None path = os.path.join(name, 'chunks', sub) diff --git a/aim/sdk/reporter/file_manager.py b/aim/sdk/reporter/file_manager.py index 80c2d9a85..72633f084 100644 --- a/aim/sdk/reporter/file_manager.py +++ b/aim/sdk/reporter/file_manager.py @@ -10,10 +10,12 @@ class FileManager(object): @abstractmethod - def poll(self, pattern: str) -> Optional[str]: ... + def poll(self, pattern: str) -> Optional[str]: + ... @abstractmethod - def touch(self, filename: str, cleanup_file_pattern: Optional[str] = None): ... + def touch(self, filename: str, cleanup_file_pattern: Optional[str] = None): + ... class LocalFileManager(FileManager): diff --git a/aim/sdk/run_status_watcher.py b/aim/sdk/run_status_watcher.py index 422cbff12..ccf203bd5 100644 --- a/aim/sdk/run_status_watcher.py +++ b/aim/sdk/run_status_watcher.py @@ -83,13 +83,16 @@ def __init__(self, *, obj_idx: Optional[str] = None, rank: Optional[int] = None, self.message = message @abstractmethod - def is_sent(self): ... + def is_sent(self): + ... @abstractmethod - def update_last_sent(self): ... + def update_last_sent(self): + ... @abstractmethod - def get_msg_details(self): ... + def get_msg_details(self): + ... class StatusNotification(Notification): diff --git a/aim/storage/arrayview.py b/aim/storage/arrayview.py index 4694c1eab..2b9fd8954 100644 --- a/aim/storage/arrayview.py +++ b/aim/storage/arrayview.py @@ -9,7 +9,8 @@ class ArrayView: when index values are not important. """ - def __iter__(self) -> Iterator[Any]: ... + def __iter__(self) -> Iterator[Any]: + ... def keys(self) -> Iterator[int]: """Return sparse indices iterator. @@ -43,13 +44,16 @@ def items(self) -> Iterator[Tuple[int, Any]]: """ ... - def __len__(self) -> int: ... + def __len__(self) -> int: + ... - def __getitem__(self, idx: Union[int, slice]): ... + def __getitem__(self, idx: Union[int, slice]): + ... # TODO implement append - def __setitem__(self, idx: int, val: Any): ... + def __setitem__(self, idx: int, val: Any): + ... def sparse_list(self) -> Tuple[List[int], List[Any]]: """Get sparse indices and values as :obj:`list`s.""" diff --git a/aim/storage/artifacts/artifact_storage.py b/aim/storage/artifacts/artifact_storage.py index efa73cbd1..e0bab8934 100644 --- a/aim/storage/artifacts/artifact_storage.py +++ b/aim/storage/artifacts/artifact_storage.py @@ -7,10 +7,13 @@ def __init__(self, url: str): self.url = url @abstractmethod - def upload_artifact(self, file_path: str, artifact_path: str, block: bool = False): ... + def upload_artifact(self, file_path: str, artifact_path: str, block: bool = False): + ... @abstractmethod - def download_artifact(self, artifact_path: str, dest_dir: Optional[str] = None) -> str: ... + def download_artifact(self, artifact_path: str, dest_dir: Optional[str] = None) -> str: + ... @abstractmethod - def delete_artifact(self, artifact_path: str): ... + def delete_artifact(self, artifact_path: str): + ... diff --git a/aim/storage/inmemorytreeview.py b/aim/storage/inmemorytreeview.py index 7d02c347d..1ce208594 100644 --- a/aim/storage/inmemorytreeview.py +++ b/aim/storage/inmemorytreeview.py @@ -117,6 +117,8 @@ def iterlevel( def array(self, path: Union[AimObjectKey, AimObjectPath] = (), dtype: Any = None) -> TreeArrayView: return TreeArrayView(self.subtree(path), dtype=dtype) - def first_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: ... + def first_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: + ... - def last_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: ... + def last_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: + ... diff --git a/aim/storage/query.py b/aim/storage/query.py index 82de23657..f8fa81fbb 100644 --- a/aim/storage/query.py +++ b/aim/storage/query.py @@ -80,7 +80,8 @@ def __init__(self, expr: str): self.expr = expr @abstractmethod - def check(self, **params) -> bool: ... + def check(self, **params) -> bool: + ... def __call__(self, **params): return self.check(**params) diff --git a/aim/storage/structured/entities.py b/aim/storage/structured/entities.py index 900c422ec..a43471ea7 100644 --- a/aim/storage/structured/entities.py +++ b/aim/storage/structured/entities.py @@ -13,224 +13,281 @@ class StructuredObject(ABC): @classmethod @abstractmethod - def fields(cls): ... + def fields(cls): + ... class Searchable(ABC, Generic[T]): @classmethod @abstractmethod - def find(cls, _id: str, **kwargs) -> Optional[T]: ... + def find(cls, _id: str, **kwargs) -> Optional[T]: + ... @classmethod @abstractmethod - def all(cls, **kwargs) -> Collection[T]: ... + def all(cls, **kwargs) -> Collection[T]: + ... @classmethod @abstractmethod - def search(cls, term: str, **kwargs) -> Collection[T]: ... + def search(cls, term: str, **kwargs) -> Collection[T]: + ... class Run(StructuredObject, Searchable['Run']): @property @abstractmethod - def hash(self) -> str: ... + def hash(self) -> str: + ... @property @abstractmethod - def name(self) -> Optional[str]: ... + def name(self) -> Optional[str]: + ... @name.setter @abstractmethod - def name(self, value: str): ... + def name(self, value: str): + ... @property @abstractmethod - def description(self) -> Optional[str]: ... + def description(self) -> Optional[str]: + ... @description.setter @abstractmethod - def description(self, value: str): ... + def description(self, value: str): + ... @property @abstractmethod - def archived(self) -> bool: ... + def archived(self) -> bool: + ... @archived.setter @abstractmethod - def archived(self, value: bool): ... + def archived(self, value: bool): + ... @property @abstractmethod - def experiment(self) -> Optional['Experiment']: ... + def experiment(self) -> Optional['Experiment']: + ... @experiment.setter @abstractmethod - def experiment(self, value: str): ... + def experiment(self, value: str): + ... @property @abstractmethod - def tags(self) -> TagCollection: ... + def tags(self) -> TagCollection: + ... @abstractmethod - def add_tag(self, value: str) -> 'Tag': ... + def add_tag(self, value: str) -> 'Tag': + ... @abstractmethod - def remove_tag(self, tag_name: str) -> bool: ... + def remove_tag(self, tag_name: str) -> bool: + ... @property @abstractmethod - def info(self) -> 'RunInfo': ... + def info(self) -> 'RunInfo': + ... class Experiment(StructuredObject, Searchable['Experiment']): @property @abstractmethod - def uuid(self) -> str: ... + def uuid(self) -> str: + ... @property @abstractmethod - def name(self) -> str: ... + def name(self) -> str: + ... @name.setter @abstractmethod - def name(self, value: str): ... + def name(self, value: str): + ... @property @abstractmethod - def description(self) -> Optional[str]: ... + def description(self) -> Optional[str]: + ... @description.setter @abstractmethod - def description(self, value: str): ... + def description(self, value: str): + ... @property @abstractmethod - def archived(self) -> bool: ... + def archived(self) -> bool: + ... @archived.setter @abstractmethod - def archived(self, value: bool): ... + def archived(self, value: bool): + ... @property @abstractmethod - def runs(self) -> RunCollection: ... + def runs(self) -> RunCollection: + ... class Tag(StructuredObject, Searchable['Tag']): @property @abstractmethod - def uuid(self) -> str: ... + def uuid(self) -> str: + ... @property @abstractmethod - def name(self) -> str: ... + def name(self) -> str: + ... @name.setter @abstractmethod - def name(self, value: str): ... + def name(self, value: str): + ... @property @abstractmethod - def color(self) -> str: ... + def color(self) -> str: + ... @color.setter @abstractmethod - def color(self, value: str): ... + def color(self, value: str): + ... @property @abstractmethod - def description(self) -> str: ... + def description(self) -> str: + ... @description.setter @abstractmethod - def description(self, value: str): ... + def description(self, value: str): + ... @property @abstractmethod - def archived(self) -> bool: ... + def archived(self) -> bool: + ... @archived.setter @abstractmethod - def archived(self, value: bool): ... + def archived(self, value: bool): + ... @property @abstractmethod - def runs(self) -> RunCollection: ... + def runs(self) -> RunCollection: + ... class Note(StructuredObject, Searchable['Note']): @property @abstractmethod - def id(self) -> int: ... + def id(self) -> int: + ... @property @abstractmethod - def content(self) -> str: ... + def content(self) -> str: + ... @content.setter @abstractmethod - def content(self, value: str): ... + def content(self, value: str): + ... @property @abstractmethod - def run(self) -> int: ... + def run(self) -> int: + ... class RunInfo(StructuredObject, Generic[T]): @property @abstractmethod - def last_notification_index(self) -> int: ... + def last_notification_index(self) -> int: + ... @last_notification_index.setter @abstractmethod - def last_notification_index(self, value: int): ... + def last_notification_index(self, value: int): + ... class ObjectFactory: @abstractmethod - def runs(self) -> RunCollection: ... + def runs(self) -> RunCollection: + ... @abstractmethod - def search_runs(self, term: str) -> RunCollection: ... + def search_runs(self, term: str) -> RunCollection: + ... @abstractmethod - def find_run(self, _id: str) -> Run: ... + def find_run(self, _id: str) -> Run: + ... @abstractmethod - def find_runs(self, ids: List[str]) -> List[Run]: ... + def find_runs(self, ids: List[str]) -> List[Run]: + ... @abstractmethod - def create_run(self, runhash: str) -> Run: ... + def create_run(self, runhash: str) -> Run: + ... @abstractmethod - def delete_run(self, runhash: str) -> bool: ... + def delete_run(self, runhash: str) -> bool: + ... @abstractmethod - def experiments(self) -> ExperimentCollection: ... + def experiments(self) -> ExperimentCollection: + ... @abstractmethod - def search_experiments(self, term: str) -> ExperimentCollection: ... + def search_experiments(self, term: str) -> ExperimentCollection: + ... @abstractmethod - def find_experiment(self, _id: str) -> Experiment: ... + def find_experiment(self, _id: str) -> Experiment: + ... @abstractmethod - def create_experiment(self, name: str) -> Experiment: ... + def create_experiment(self, name: str) -> Experiment: + ... @abstractmethod - def delete_experiment(self, _id: str) -> bool: ... + def delete_experiment(self, _id: str) -> bool: + ... @abstractmethod - def tags(self) -> TagCollection: ... + def tags(self) -> TagCollection: + ... @abstractmethod - def search_tags(self, term: str) -> TagCollection: ... + def search_tags(self, term: str) -> TagCollection: + ... @abstractmethod - def find_tag(self, _id: str) -> Tag: ... + def find_tag(self, _id: str) -> Tag: + ... @abstractmethod - def create_tag(self, name: str) -> Tag: ... + def create_tag(self, name: str) -> Tag: + ... @abstractmethod - def delete_tag(self, name: str) -> bool: ... + def delete_tag(self, name: str) -> bool: + ... diff --git a/aim/storage/treeview.py b/aim/storage/treeview.py index fc05a06f6..f80beff50 100644 --- a/aim/storage/treeview.py +++ b/aim/storage/treeview.py @@ -8,21 +8,26 @@ class TreeView: - def preload(self): ... + def preload(self): + ... - def finalize(self, index: 'TreeView'): ... + def finalize(self, index: 'TreeView'): + ... def subtree(self, path: Union[AimObjectKey, AimObjectPath]) -> 'TreeView': # Default to: return self.view(path, resolve=False) - def view(self, path: Union[AimObjectKey, AimObjectPath], resolve: bool = False): ... + def view(self, path: Union[AimObjectKey, AimObjectPath], resolve: bool = False): + ... - def make_array(self, path: Union[AimObjectKey, AimObjectPath] = ()): ... + def make_array(self, path: Union[AimObjectKey, AimObjectPath] = ()): + ... def collect( self, path: Union[AimObjectKey, AimObjectPath] = (), strict: bool = True, resolve_objects: bool = False - ) -> AimObject: ... + ) -> AimObject: + ... def __getitem__(self, path: Union[AimObjectKey, AimObjectPath]) -> AimObject: return self.collect(path) @@ -33,7 +38,8 @@ def get(self, path: Union[AimObjectKey, AimObjectPath] = (), default: Any = None except KeyError: return default - def __delitem__(self, path: Union[AimObjectKey, AimObjectPath]): ... + def __delitem__(self, path: Union[AimObjectKey, AimObjectPath]): + ... def set(self, path: Union[AimObjectKey, AimObjectPath], value: AimObject, strict: bool = True): self.__setitem__(path, value) @@ -45,18 +51,25 @@ def __setitem__(self, path: Union[AimObjectKey, AimObjectPath], value: AimObject def keys_eager( self, path: Union[AimObjectKey, AimObjectPath] = (), - ): ... + ): + ... def keys( self, path: Union[AimObjectKey, AimObjectPath] = (), level: int = None - ) -> Iterator[Union[AimObjectPath, AimObjectKey]]: ... + ) -> Iterator[Union[AimObjectPath, AimObjectKey]]: + ... - def items_eager(self, path: Union[AimObjectKey, AimObjectPath] = ()): ... + def items_eager(self, path: Union[AimObjectKey, AimObjectPath] = ()): + ... - def items(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> Iterator[Tuple[AimObjectKey, AimObject]]: ... + def items(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> Iterator[Tuple[AimObjectKey, AimObject]]: + ... - def array(self, path: Union[AimObjectKey, AimObjectPath] = (), dtype: Any = None) -> 'ArrayView': ... + def array(self, path: Union[AimObjectKey, AimObjectPath] = (), dtype: Any = None) -> 'ArrayView': + ... - def first_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: ... + def first_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: + ... - def last_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: ... + def last_key(self, path: Union[AimObjectKey, AimObjectPath] = ()) -> AimObjectKey: + ... From b685471e5a8da29c722cf482978acb8747634f4f Mon Sep 17 00:00:00 2001 From: Albert Torosyan Date: Wed, 2 Apr 2025 11:52:05 +0400 Subject: [PATCH 5/5] [fix] Handle progress file removal for normal exits and run terminations --- aim/sdk/run_status_manager.py | 2 ++ aim/storage/rockscontainer.pyx | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/aim/sdk/run_status_manager.py b/aim/sdk/run_status_manager.py index d5562628e..71dc42eeb 100644 --- a/aim/sdk/run_status_manager.py +++ b/aim/sdk/run_status_manager.py @@ -89,5 +89,7 @@ def _mark_run_as_terminated(self, run_hash: str): ) if meta_run_tree.get('end_time') is None: meta_run_tree['end_time'] = datetime.datetime.now(pytz.utc).timestamp() + progress_path = self.progress_dir / run_hash + progress_path.unlink(missing_ok=True) except (aimrocks.errors.RocksIOError, aimrocks.errors.Corruption): self._corrupted_runs.add(run_hash) diff --git a/aim/storage/rockscontainer.pyx b/aim/storage/rockscontainer.pyx index 0f8c3de25..e96fc4b42 100644 --- a/aim/storage/rockscontainer.pyx +++ b/aim/storage/rockscontainer.pyx @@ -35,6 +35,7 @@ class RocksAutoClean(AutoClean): super().__init__(instance) self._lock = None self._db = None + self._progress_path = None def _close(self): """ @@ -48,6 +49,9 @@ class RocksAutoClean(AutoClean): self._db = None self._lock.release() self._lock = None + if self._progress_path is not None: + self._progress_path.unlink(missing_ok=True) + self._progress_path = None if self._db is not None: self._db = None @@ -104,6 +108,7 @@ class RocksContainer(Container): if not self.read_only: progress_dir.mkdir(parents=True, exist_ok=True) self._progress_path.touch(exist_ok=True) + self._resources._progress_path = self._progress_path self.db # TODO check if Containers are reopenable