@@ -559,9 +559,7 @@ class llama_model_params(ctypes.Structure):
559
559
# enum ggml_type type_k; // data type for K cache
560
560
# enum ggml_type type_v; // data type for V cache
561
561
562
-
563
562
# // Keep the booleans together to avoid misalignment during copy-by-value.
564
- # bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
565
563
# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
566
564
# bool embedding; // embedding mode only
567
565
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
@@ -589,7 +587,6 @@ class llama_context_params(ctypes.Structure):
589
587
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
590
588
type_k (int): data type for K cache
591
589
type_v (int): data type for V cache
592
- mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
593
590
logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
594
591
embedding (bool): embedding mode only
595
592
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
@@ -615,7 +612,6 @@ class llama_context_params(ctypes.Structure):
615
612
("cb_eval_user_data" , ctypes .c_void_p ),
616
613
("type_k" , ctypes .c_int ),
617
614
("type_v" , ctypes .c_int ),
618
- ("mul_mat_q" , ctypes .c_bool ),
619
615
("logits_all" , ctypes .c_bool ),
620
616
("embedding" , ctypes .c_bool ),
621
617
("offload_kqv" , ctypes .c_bool ),
@@ -1519,11 +1515,11 @@ def llama_copy_state_data(
1519
1515
...
1520
1516
1521
1517
1522
- # Set the state reading from the specified address
1523
- # Returns the number of bytes read
1518
+ # // Set the state reading from the specified address
1519
+ # // Returns the number of bytes read
1524
1520
# LLAMA_API size_t llama_set_state_data(
1525
1521
# struct llama_context * ctx,
1526
- # uint8_t * src);
1522
+ # const uint8_t * src);
1527
1523
@ctypes_function (
1528
1524
"llama_set_state_data" ,
1529
1525
[llama_context_p_ctypes , ctypes .POINTER (ctypes .c_uint8 )],
0 commit comments