@@ -111,6 +111,7 @@ class CtypesRef(Generic[CtypesCData]):
111
111
112
112
F = TypeVar ("F" , bound = Callable [..., Any ])
113
113
114
+
114
115
def ctypes_function_for_shared_library (lib : ctypes .CDLL ):
115
116
def ctypes_function (
116
117
name : str , argtypes : List [Any ], restype : Any , enabled : bool = True
@@ -938,18 +939,6 @@ def llama_supports_gpu_offload() -> bool:
938
939
...
939
940
940
941
941
- # LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
942
- @ctypes_function ("llama_mmap_supported" , [], ctypes .c_bool )
943
- def llama_mmap_supported () -> bool :
944
- ...
945
-
946
-
947
- # LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
948
- @ctypes_function ("llama_mlock_supported" , [], ctypes .c_bool )
949
- def llama_mlock_supported () -> bool :
950
- ...
951
-
952
-
953
942
# LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
954
943
@ctypes_function ("llama_get_model" , [llama_context_p_ctypes ], llama_model_p_ctypes )
955
944
def llama_get_model (ctx : llama_context_p , / ) -> Optional [llama_model_p ]:
@@ -1158,47 +1147,6 @@ def llama_model_quantize(
1158
1147
...
1159
1148
1160
1149
1161
- # // Apply a LoRA adapter to a loaded model
1162
- # // path_base_model is the path to a higher quality model to use as a base for
1163
- # // the layers modified by the adapter. Can be NULL to use the current loaded model.
1164
- # // The model needs to be reloaded before applying a new adapter, otherwise the adapter
1165
- # // will be applied on top of the previous one
1166
- # // Returns 0 on success
1167
- # LLAMA_API DEPRECATED(int32_t llama_apply_lora_from_file(
1168
- # struct llama_context * ctx,
1169
- # const char * path_lora,
1170
- # float scale,
1171
- # const char * path_base_model,
1172
- # int32_t n_threads),
1173
- # "use llama_model_apply_lora_from_file instead");
1174
- @ctypes_function (
1175
- "llama_apply_lora_from_file" ,
1176
- [
1177
- llama_context_p_ctypes ,
1178
- ctypes .c_char_p ,
1179
- ctypes .c_float ,
1180
- ctypes .c_char_p ,
1181
- ctypes .c_int32 ,
1182
- ],
1183
- ctypes .c_int32 ,
1184
- )
1185
- def llama_apply_lora_from_file (
1186
- ctx : llama_context_p ,
1187
- path_lora : Union [ctypes .c_char_p , bytes ],
1188
- scale : Union [ctypes .c_float , float ],
1189
- path_base_model : Union [ctypes .c_char_p , bytes ],
1190
- n_threads : Union [ctypes .c_int32 , int ],
1191
- / ,
1192
- ) -> int :
1193
- """Apply a LoRA adapter to a loaded model
1194
- path_base_model is the path to a higher quality model to use as a base for
1195
- the layers modified by the adapter. Can be NULL to use the current loaded model.
1196
- The model needs to be reloaded before applying a new adapter, otherwise the adapter
1197
- will be applied on top of the previous one
1198
- Returns 0 on success"""
1199
- ...
1200
-
1201
-
1202
1150
# LLAMA_API int32_t llama_model_apply_lora_from_file(
1203
1151
# const struct llama_model * model,
1204
1152
# const char * path_lora,
@@ -1220,7 +1168,7 @@ def llama_model_apply_lora_from_file(
1220
1168
model : llama_model_p ,
1221
1169
path_lora : Union [ctypes .c_char_p , bytes ],
1222
1170
scale : Union [ctypes .c_float , float ],
1223
- path_base_model : Union [ctypes .c_char_p , bytes ],
1171
+ path_base_model : Union [ctypes .c_char_p , bytes , None ],
1224
1172
n_threads : Union [ctypes .c_int32 , int ],
1225
1173
/ ,
1226
1174
) -> int :
@@ -1647,72 +1595,6 @@ def llama_save_session_file(
1647
1595
# //
1648
1596
1649
1597
1650
- # // Run the llama inference to obtain the logits and probabilities for the next token(s).
1651
- # // tokens + n_tokens is the provided batch of new tokens to process
1652
- # // n_past is the number of tokens to use from previous eval calls
1653
- # // Returns 0 on success
1654
- # // DEPRECATED: use llama_decode() instead
1655
- # LLAMA_API DEPRECATED(int llama_eval(
1656
- # struct llama_context * ctx,
1657
- # llama_token * tokens,
1658
- # int32_t n_tokens,
1659
- # int32_t n_past),
1660
- # "use llama_decode() instead");
1661
- @ctypes_function (
1662
- "llama_eval" ,
1663
- [
1664
- llama_context_p_ctypes ,
1665
- llama_token_p ,
1666
- ctypes .c_int32 ,
1667
- ctypes .c_int32 ,
1668
- ],
1669
- ctypes .c_int ,
1670
- )
1671
- def llama_eval (
1672
- ctx : llama_context_p ,
1673
- tokens : CtypesArray [llama_token ],
1674
- n_tokens : Union [ctypes .c_int , int ],
1675
- n_past : Union [ctypes .c_int , int ],
1676
- / ,
1677
- ) -> int :
1678
- """Run the llama inference to obtain the logits and probabilities for the next token(s).
1679
- tokens + n_tokens is the provided batch of new tokens to process
1680
- n_past is the number of tokens to use from previous eval calls
1681
- Returns 0 on success
1682
- DEPRECATED: use llama_decode() instead"""
1683
- ...
1684
-
1685
-
1686
- # // Same as llama_eval, but use float matrix input directly.
1687
- # // DEPRECATED: use llama_decode() instead
1688
- # LLAMA_API DEPRECATED(int llama_eval_embd(
1689
- # struct llama_context * ctx,
1690
- # float * embd,
1691
- # int32_t n_tokens,
1692
- # int32_t n_past),
1693
- # "use llama_decode() instead");
1694
- @ctypes_function (
1695
- "llama_eval_embd" ,
1696
- [
1697
- llama_context_p_ctypes ,
1698
- ctypes .POINTER (ctypes .c_float ),
1699
- ctypes .c_int32 ,
1700
- ctypes .c_int32 ,
1701
- ],
1702
- ctypes .c_int ,
1703
- )
1704
- def llama_eval_embd (
1705
- ctx : llama_context_p ,
1706
- embd : CtypesArray [ctypes .c_float ],
1707
- n_tokens : Union [ctypes .c_int , int ],
1708
- n_past : Union [ctypes .c_int , int ],
1709
- / ,
1710
- ) -> int :
1711
- """Same as llama_eval, but use float matrix input directly.
1712
- DEPRECATED: use llama_decode() instead"""
1713
- ...
1714
-
1715
-
1716
1598
# // Return batch for single sequence of tokens starting at pos_0
1717
1599
# //
1718
1600
# // NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
@@ -2474,28 +2356,6 @@ def llama_sample_temp(
2474
2356
...
2475
2357
2476
2358
2477
- # LLAMA_API DEPRECATED(void llama_sample_temperature(
2478
- # struct llama_context * ctx,
2479
- # llama_token_data_array * candidates,
2480
- # float temp),
2481
- # "use llama_sample_temp instead");
2482
- @ctypes_function (
2483
- "llama_sample_temperature" ,
2484
- [llama_context_p_ctypes , llama_token_data_array_p , ctypes .c_float ],
2485
- None ,
2486
- )
2487
- def llama_sample_temperature (
2488
- ctx : llama_context_p ,
2489
- candidates : Union [
2490
- CtypesArray [llama_token_data_array ], CtypesPointerOrRef [llama_token_data_array ]
2491
- ],
2492
- temp : Union [ctypes .c_float , float ],
2493
- / ,
2494
- ):
2495
- """use llama_sample_temp instead"""
2496
- ...
2497
-
2498
-
2499
2359
# /// @details Apply constraints from grammar
2500
2360
# LLAMA_API void llama_sample_grammar(
2501
2361
# struct llama_context * ctx,
0 commit comments