Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 454c9bb

Browse files
committed
feat: Update llama.cpp
1 parent 2d89964 commit 454c9bb

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

llama_cpp/llama_cpp.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
300300
# LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
301301
# LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
302302
# LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
303+
# LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
303304
# };
304305
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
305306
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -315,6 +316,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
315316
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11
316317
LLAMA_VOCAB_PRE_TYPE_OLMO = 12
317318
LLAMA_VOCAB_PRE_TYPE_DBRX = 13
319+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14
318320

319321

320322
# // note: these values should be synchronized with ggml_rope
@@ -718,6 +720,8 @@ class llama_model_params(ctypes.Structure):
718720
]
719721

720722

723+
# // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
724+
# // https://github.com/ggerganov/llama.cpp/pull/7544
721725
# struct llama_context_params {
722726
# uint32_t seed; // RNG seed, -1 for random
723727
# uint32_t n_ctx; // text context, 0 = from model
@@ -744,15 +748,14 @@ class llama_model_params(ctypes.Structure):
744748
# ggml_backend_sched_eval_callback cb_eval;
745749
# void * cb_eval_user_data;
746750

747-
# enum ggml_type type_k; // data type for K cache
748-
# enum ggml_type type_v; // data type for V cache
751+
# enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
752+
# enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
749753

750754
# // Keep the booleans together to avoid misalignment during copy-by-value.
751755
# bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
752756
# bool embeddings; // if true, extract embeddings (together with logits)
753757
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
754-
# bool flash_attn; // whether to use flash attention
755-
758+
# bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
756759

757760
# // Abort callback
758761
# // if it returns true, execution of llama_decode() will be aborted
@@ -2454,6 +2457,16 @@ def llama_token_is_eog(model: llama_model_p, token: Union[llama_token, int], /)
24542457
...
24552458

24562459

2460+
# // Identify if Token Id is a control token or a render-able token
2461+
# LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
2462+
@ctypes_function(
2463+
"llama_token_is_control", [llama_model_p_ctypes, llama_token], ctypes.c_bool
2464+
)
2465+
def llama_token_is_control(model: llama_model_p, token: Union[llama_token, int], /) -> bool:
2466+
"""Identify if Token Id is a control token or a render-able token"""
2467+
...
2468+
2469+
24572470
# // Special tokens
24582471

24592472

vendor/llama.cpp

0 commit comments

Comments
 (0)