@@ -111,6 +111,7 @@ class CtypesRef(Generic[CtypesCData]):
111111
112112F = TypeVar ("F" , bound = Callable [..., Any ])
113113
114+
114115def ctypes_function_for_shared_library (lib : ctypes .CDLL ):
115116 def ctypes_function (
116117 name : str , argtypes : List [Any ], restype : Any , enabled : bool = True
@@ -938,18 +939,6 @@ def llama_supports_gpu_offload() -> bool:
938939 ...
939940
940941
941- # LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
942- @ctypes_function ("llama_mmap_supported" , [], ctypes .c_bool )
943- def llama_mmap_supported () -> bool :
944- ...
945-
946-
947- # LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
948- @ctypes_function ("llama_mlock_supported" , [], ctypes .c_bool )
949- def llama_mlock_supported () -> bool :
950- ...
951-
952-
953942# LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
954943@ctypes_function ("llama_get_model" , [llama_context_p_ctypes ], llama_model_p_ctypes )
955944def llama_get_model (ctx : llama_context_p , / ) -> Optional [llama_model_p ]:
@@ -1158,47 +1147,6 @@ def llama_model_quantize(
11581147 ...
11591148
11601149
1161- # // Apply a LoRA adapter to a loaded model
1162- # // path_base_model is the path to a higher quality model to use as a base for
1163- # // the layers modified by the adapter. Can be NULL to use the current loaded model.
1164- # // The model needs to be reloaded before applying a new adapter, otherwise the adapter
1165- # // will be applied on top of the previous one
1166- # // Returns 0 on success
1167- # LLAMA_API DEPRECATED(int32_t llama_apply_lora_from_file(
1168- # struct llama_context * ctx,
1169- # const char * path_lora,
1170- # float scale,
1171- # const char * path_base_model,
1172- # int32_t n_threads),
1173- # "use llama_model_apply_lora_from_file instead");
1174- @ctypes_function (
1175- "llama_apply_lora_from_file" ,
1176- [
1177- llama_context_p_ctypes ,
1178- ctypes .c_char_p ,
1179- ctypes .c_float ,
1180- ctypes .c_char_p ,
1181- ctypes .c_int32 ,
1182- ],
1183- ctypes .c_int32 ,
1184- )
1185- def llama_apply_lora_from_file (
1186- ctx : llama_context_p ,
1187- path_lora : Union [ctypes .c_char_p , bytes ],
1188- scale : Union [ctypes .c_float , float ],
1189- path_base_model : Union [ctypes .c_char_p , bytes ],
1190- n_threads : Union [ctypes .c_int32 , int ],
1191- / ,
1192- ) -> int :
1193- """Apply a LoRA adapter to a loaded model
1194- path_base_model is the path to a higher quality model to use as a base for
1195- the layers modified by the adapter. Can be NULL to use the current loaded model.
1196- The model needs to be reloaded before applying a new adapter, otherwise the adapter
1197- will be applied on top of the previous one
1198- Returns 0 on success"""
1199- ...
1200-
1201-
12021150# LLAMA_API int32_t llama_model_apply_lora_from_file(
12031151# const struct llama_model * model,
12041152# const char * path_lora,
@@ -1220,7 +1168,7 @@ def llama_model_apply_lora_from_file(
12201168 model : llama_model_p ,
12211169 path_lora : Union [ctypes .c_char_p , bytes ],
12221170 scale : Union [ctypes .c_float , float ],
1223- path_base_model : Union [ctypes .c_char_p , bytes ],
1171+ path_base_model : Union [ctypes .c_char_p , bytes , None ],
12241172 n_threads : Union [ctypes .c_int32 , int ],
12251173 / ,
12261174) -> int :
@@ -1647,72 +1595,6 @@ def llama_save_session_file(
16471595# //
16481596
16491597
1650- # // Run the llama inference to obtain the logits and probabilities for the next token(s).
1651- # // tokens + n_tokens is the provided batch of new tokens to process
1652- # // n_past is the number of tokens to use from previous eval calls
1653- # // Returns 0 on success
1654- # // DEPRECATED: use llama_decode() instead
1655- # LLAMA_API DEPRECATED(int llama_eval(
1656- # struct llama_context * ctx,
1657- # llama_token * tokens,
1658- # int32_t n_tokens,
1659- # int32_t n_past),
1660- # "use llama_decode() instead");
1661- @ctypes_function (
1662- "llama_eval" ,
1663- [
1664- llama_context_p_ctypes ,
1665- llama_token_p ,
1666- ctypes .c_int32 ,
1667- ctypes .c_int32 ,
1668- ],
1669- ctypes .c_int ,
1670- )
1671- def llama_eval (
1672- ctx : llama_context_p ,
1673- tokens : CtypesArray [llama_token ],
1674- n_tokens : Union [ctypes .c_int , int ],
1675- n_past : Union [ctypes .c_int , int ],
1676- / ,
1677- ) -> int :
1678- """Run the llama inference to obtain the logits and probabilities for the next token(s).
1679- tokens + n_tokens is the provided batch of new tokens to process
1680- n_past is the number of tokens to use from previous eval calls
1681- Returns 0 on success
1682- DEPRECATED: use llama_decode() instead"""
1683- ...
1684-
1685-
1686- # // Same as llama_eval, but use float matrix input directly.
1687- # // DEPRECATED: use llama_decode() instead
1688- # LLAMA_API DEPRECATED(int llama_eval_embd(
1689- # struct llama_context * ctx,
1690- # float * embd,
1691- # int32_t n_tokens,
1692- # int32_t n_past),
1693- # "use llama_decode() instead");
1694- @ctypes_function (
1695- "llama_eval_embd" ,
1696- [
1697- llama_context_p_ctypes ,
1698- ctypes .POINTER (ctypes .c_float ),
1699- ctypes .c_int32 ,
1700- ctypes .c_int32 ,
1701- ],
1702- ctypes .c_int ,
1703- )
1704- def llama_eval_embd (
1705- ctx : llama_context_p ,
1706- embd : CtypesArray [ctypes .c_float ],
1707- n_tokens : Union [ctypes .c_int , int ],
1708- n_past : Union [ctypes .c_int , int ],
1709- / ,
1710- ) -> int :
1711- """Same as llama_eval, but use float matrix input directly.
1712- DEPRECATED: use llama_decode() instead"""
1713- ...
1714-
1715-
17161598# // Return batch for single sequence of tokens starting at pos_0
17171599# //
17181600# // NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
@@ -2474,28 +2356,6 @@ def llama_sample_temp(
24742356 ...
24752357
24762358
2477- # LLAMA_API DEPRECATED(void llama_sample_temperature(
2478- # struct llama_context * ctx,
2479- # llama_token_data_array * candidates,
2480- # float temp),
2481- # "use llama_sample_temp instead");
2482- @ctypes_function (
2483- "llama_sample_temperature" ,
2484- [llama_context_p_ctypes , llama_token_data_array_p , ctypes .c_float ],
2485- None ,
2486- )
2487- def llama_sample_temperature (
2488- ctx : llama_context_p ,
2489- candidates : Union [
2490- CtypesArray [llama_token_data_array ], CtypesPointerOrRef [llama_token_data_array ]
2491- ],
2492- temp : Union [ctypes .c_float , float ],
2493- / ,
2494- ):
2495- """use llama_sample_temp instead"""
2496- ...
2497-
2498-
24992359# /// @details Apply constraints from grammar
25002360# LLAMA_API void llama_sample_grammar(
25012361# struct llama_context * ctx,
0 commit comments