@@ -198,6 +198,7 @@ def __init__(
198
198
A Llama instance.
199
199
"""
200
200
self .verbose = verbose
201
+ self ._stack = contextlib .ExitStack ()
201
202
202
203
set_verbose (verbose )
203
204
@@ -365,8 +366,6 @@ def __init__(
365
366
if not os .path .exists (model_path ):
366
367
raise ValueError (f"Model path does not exist: { model_path } " )
367
368
368
- self ._stack = contextlib .ExitStack ()
369
-
370
369
self ._model = self ._stack .enter_context (
371
370
contextlib .closing (
372
371
_LlamaModel (
@@ -420,6 +419,15 @@ def __init__(
420
419
raise RuntimeError (
421
420
f"Failed to initialize LoRA adapter from lora path: { self .lora_path } "
422
421
)
422
+
423
+ def free_lora_adapter ():
424
+ if self ._lora_adapter is None :
425
+ return
426
+ llama_cpp .llama_lora_adapter_free (self ._lora_adapter )
427
+ self ._lora_adapter = None
428
+
429
+ self ._stack .callback (free_lora_adapter )
430
+
423
431
assert self ._ctx .ctx is not None
424
432
if llama_cpp .llama_lora_adapter_set (
425
433
self ._ctx .ctx , self ._lora_adapter , self .lora_scale
@@ -2085,14 +2093,9 @@ def pooling_type(self) -> str:
2085
2093
2086
2094
def close (self ) -> None :
2087
2095
"""Explicitly free the model from memory."""
2088
- if hasattr (self ,'_stack' ):
2089
- if self ._stack is not None :
2090
- self ._stack .close ()
2096
+ self ._stack .close ()
2091
2097
2092
2098
def __del__ (self ) -> None :
2093
- if hasattr (self ,'_lora_adapter' ):
2094
- if self ._lora_adapter is not None :
2095
- llama_cpp .llama_lora_adapter_free (self ._lora_adapter )
2096
2099
self .close ()
2097
2100
2098
2101
@staticmethod
0 commit comments