Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f062a7f

Browse files
committed
feat: Update llama.cpp
1 parent cf1fdd8 commit f062a7f

File tree

3 files changed

+4
-12
lines changed

3 files changed

+4
-12
lines changed

llama_cpp/llama.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def __init__(
8686
yarn_beta_fast: float = 32.0,
8787
yarn_beta_slow: float = 1.0,
8888
yarn_orig_ctx: int = 0,
89-
mul_mat_q: bool = True,
9089
logits_all: bool = False,
9190
embedding: bool = False,
9291
offload_kqv: bool = True,
@@ -291,7 +290,6 @@ def __init__(
291290
yarn_beta_slow if yarn_beta_slow != 0.0 else 0
292291
)
293292
self.context_params.yarn_orig_ctx = yarn_orig_ctx if yarn_orig_ctx != 0 else 0
294-
self.context_params.mul_mat_q = mul_mat_q
295293
self.context_params.logits_all = (
296294
logits_all if draft_model is None else True
297295
) # Must be set to True for speculative decoding
@@ -1724,7 +1722,6 @@ def __getstate__(self):
17241722
yarn_beta_fast=self.context_params.yarn_beta_fast,
17251723
yarn_beta_slow=self.context_params.yarn_beta_slow,
17261724
yarn_orig_ctx=self.context_params.yarn_orig_ctx,
1727-
mul_mat_q=self.context_params.mul_mat_q,
17281725
logits_all=self.context_params.logits_all,
17291726
embedding=self.context_params.embedding,
17301727
# Sampling Params
@@ -1768,7 +1765,6 @@ def __setstate__(self, state):
17681765
yarn_beta_fast=state["yarn_beta_fast"],
17691766
yarn_beta_slow=state["yarn_beta_slow"],
17701767
yarn_orig_ctx=state["yarn_orig_ctx"],
1771-
mul_mat_q=state["mul_mat_q"],
17721768
logits_all=state["logits_all"],
17731769
embedding=state["embedding"],
17741770
# Sampling Params

llama_cpp/llama_cpp.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -559,9 +559,7 @@ class llama_model_params(ctypes.Structure):
559559
# enum ggml_type type_k; // data type for K cache
560560
# enum ggml_type type_v; // data type for V cache
561561

562-
563562
# // Keep the booleans together to avoid misalignment during copy-by-value.
564-
# bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
565563
# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
566564
# bool embedding; // embedding mode only
567565
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
@@ -589,7 +587,6 @@ class llama_context_params(ctypes.Structure):
589587
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
590588
type_k (int): data type for K cache
591589
type_v (int): data type for V cache
592-
mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
593590
logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
594591
embedding (bool): embedding mode only
595592
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
@@ -615,7 +612,6 @@ class llama_context_params(ctypes.Structure):
615612
("cb_eval_user_data", ctypes.c_void_p),
616613
("type_k", ctypes.c_int),
617614
("type_v", ctypes.c_int),
618-
("mul_mat_q", ctypes.c_bool),
619615
("logits_all", ctypes.c_bool),
620616
("embedding", ctypes.c_bool),
621617
("offload_kqv", ctypes.c_bool),
@@ -1519,11 +1515,11 @@ def llama_copy_state_data(
15191515
...
15201516

15211517

1522-
# Set the state reading from the specified address
1523-
# Returns the number of bytes read
1518+
# // Set the state reading from the specified address
1519+
# // Returns the number of bytes read
15241520
# LLAMA_API size_t llama_set_state_data(
15251521
# struct llama_context * ctx,
1526-
# uint8_t * src);
1522+
# const uint8_t * src);
15271523
@ctypes_function(
15281524
"llama_set_state_data",
15291525
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8)],

vendor/llama.cpp

0 commit comments

Comments
 (0)