Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[pull] main from abetlen:main #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.3.8]

- feat: Update llama.cpp to ggerganov/llama.cpp@7841fc723e059d1fd9640e5c0ef19050fcc7c698

## [0.3.7]

- feat: Update llama.cpp to ggerganov/llama.cpp@794fe23f29fb40104975c91fe19f23798f7c726e
Expand Down
2 changes: 1 addition & 1 deletion llama_cpp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .llama_cpp import *
from .llama import *

__version__ = "0.3.7"
__version__ = "0.3.8"
74 changes: 71 additions & 3 deletions llama_cpp/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@
# LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
# LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
# LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
# LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
# };
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
Expand Down Expand Up @@ -257,6 +258,7 @@
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26
LLAMA_VOCAB_PRE_TYPE_MINERVA = 27
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28
LLAMA_VOCAB_PRE_TYPE_GPT4O = 29


# // note: these values should be synchronized with ggml_rope
Expand Down Expand Up @@ -1357,6 +1359,12 @@ def llama_model_n_head(model: llama_model_p, /) -> int:
...


# LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);
@ctypes_function("llama_model_n_head_kv", [llama_model_p_ctypes], ctypes.c_int32)
def llama_model_n_head_kv(model: llama_model_p, /) -> int:
...


# // Get the model's RoPE frequency scaling factor
# LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
@ctypes_function("llama_model_rope_freq_scale_train", [llama_model_p_ctypes], ctypes.c_float)
Expand Down Expand Up @@ -3375,8 +3383,8 @@ class llama_sampler_i(ctypes.Structure):


# struct llama_sampler {
# struct llama_sampler_i * iface;
# llama_sampler_context_t ctx;
# const struct llama_sampler_i * iface;
# llama_sampler_context_t ctx;
# };
class llama_sampler(ctypes.Structure):
_fields_ = [
Expand Down Expand Up @@ -3410,6 +3418,18 @@ class llama_sampler(ctypes.Structure):


# // mirror of llama_sampler_i:
# LLAMA_API struct llama_sampler * llama_sampler_init (const struct llama_sampler_i * iface, llama_sampler_context_t ctx);
@ctypes_function(
"llama_sampler_init",
[ctypes.POINTER(llama_sampler_i), llama_sampler_context_t],
llama_sampler_p_ctypes,
)
def llama_sampler_init(
iface: ctypes.POINTER(llama_sampler_i), ctx: llama_sampler_context_t, /
) -> llama_sampler_p:
...


# LLAMA_API const char * llama_sampler_name (const struct llama_sampler * smpl);
@ctypes_function(
"llama_sampler_name",
Expand Down Expand Up @@ -3627,6 +3647,17 @@ def llama_sampler_init_xtc(
...


# /// @details Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641
# LLAMA_API struct llama_sampler * llama_sampler_init_top_n_sigma(float n);
@ctypes_function(
"llama_sampler_init_top_n_sigma",
[ctypes.c_float],
llama_sampler_p_ctypes,
)
def llama_sampler_init_top_n_sigma(n: float, /) -> llama_sampler_p:
...


# /// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
# /// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
# /// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
Expand Down Expand Up @@ -3685,6 +3716,43 @@ def llama_sampler_init_grammar(
...


# /// @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639
# /// @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group.
# /// @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
# LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy_patterns(
# const struct llama_vocab * vocab,
# const char * grammar_str,
# const char * grammar_root,
# const char ** trigger_patterns,
# size_t num_trigger_patterns,
# const llama_token * trigger_tokens,
# size_t num_trigger_tokens);
@ctypes_function(
"llama_sampler_init_grammar_lazy_patterns",
[
llama_vocab_p_ctypes,
ctypes.c_char_p,
ctypes.c_char_p,
ctypes.POINTER(ctypes.c_char_p),
ctypes.c_size_t,
ctypes.POINTER(llama_token),
ctypes.c_size_t,
],
llama_sampler_p_ctypes,
)
def llama_sampler_init_grammar_lazy_patterns(
vocab: llama_vocab_p,
grammar_str: bytes,
grammar_root: bytes,
trigger_patterns: CtypesArray[bytes],
num_trigger_patterns: int,
trigger_tokens: CtypesArray[llama_token],
num_trigger_tokens: int,
/,
) -> llama_sampler_p:
...


# /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
# LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
Expand Down Expand Up @@ -3737,7 +3805,7 @@ def llama_sampler_init_dry(
dry_base: float,
dry_allowed_length: int,
dry_penalty_last_n: int,
seq_breakers: CtypesArray[bytes],
seq_breakers,
num_breakers: int,
/,
) -> llama_sampler_p:
Expand Down
2 changes: 1 addition & 1 deletion vendor/llama.cpp
Loading