Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2bc1d97

Browse files
committed
feat: Update llama.cpp
1 parent 803924b commit 2bc1d97

File tree

3 files changed

+15
-46
lines changed

3 files changed

+15
-46
lines changed

llama_cpp/_internals.py

-5
Original file line numberDiff line numberDiff line change
@@ -805,15 +805,10 @@ def add_penalties(
805805
ignore_eos: bool,
806806
):
807807
sampler = llama_cpp.llama_sampler_init_penalties(
808-
n_vocab,
809-
special_eos_id,
810-
linefeed_id,
811808
penalty_last_n,
812809
penalty_repeat,
813810
penalty_freq,
814811
penalty_present,
815-
penalize_nl,
816-
ignore_eos,
817812
)
818813
self._add_sampler(sampler)
819814

llama_cpp/llama_cpp.py

+14-40
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,17 @@
256256
# // note: these values should be synchronized with ggml_rope
257257
# // TODO: maybe move this enum to ggml.h (ggml_rope_type)
258258
# enum llama_rope_type {
259-
# LLAMA_ROPE_TYPE_NONE = -1,
260-
# LLAMA_ROPE_TYPE_NORM = 0,
261-
# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
259+
# LLAMA_ROPE_TYPE_NONE = -1,
260+
# LLAMA_ROPE_TYPE_NORM = 0,
261+
# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
262+
# LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
263+
# LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
262264
# };
263265
LLAMA_ROPE_TYPE_NONE = -1
264266
LLAMA_ROPE_TYPE_NORM = 0
265267
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX = 2
268+
LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE = 8
269+
LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION = 24
266270

267271

268272
# enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
@@ -1265,6 +1269,7 @@ def llama_rope_freq_scale_train(model: llama_model_p, /) -> float:
12651269
# // Functions to access the model's GGUF metadata scalar values
12661270
# // - The functions return the length of the string on success, or -1 on failure
12671271
# // - The output string is always null-terminated and cleared on failure
1272+
# // - When retrieving a string, an extra byte must be allocated to account for the null terminator
12681273
# // - GGUF array values are not supported by these functions
12691274

12701275

@@ -1378,18 +1383,6 @@ def llama_model_n_params(model: llama_model_p, /) -> int:
13781383
...
13791384

13801385

1381-
# // Get a llama model tensor
1382-
# LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
1383-
@ctypes_function(
1384-
"llama_get_model_tensor", [llama_model_p_ctypes, ctypes.c_char_p], ctypes.c_void_p
1385-
)
1386-
def llama_get_model_tensor(
1387-
model: llama_model_p, name: Union[ctypes.c_char_p, bytes], /
1388-
) -> ctypes.c_void_p:
1389-
"""Get a llama model tensor"""
1390-
...
1391-
1392-
13931386
# // Returns true if the model contains an encoder that requires llama_encode() call
13941387
# LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
13951388
@ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool)
@@ -3336,41 +3329,22 @@ def llama_sampler_init_grammar(
33363329
...
33373330

33383331

3332+
# /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
33393333
# LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
3340-
# int32_t n_vocab, // llama_n_vocab()
3341-
# llama_token special_eos_id, // llama_token_eos()
3342-
# llama_token linefeed_id, // llama_token_nl()
3343-
# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
3344-
# float penalty_repeat, // 1.0 = disabled
3345-
# float penalty_freq, // 0.0 = disabled
3346-
# float penalty_present, // 0.0 = disabled
3347-
# bool penalize_nl, // consider newlines as a repeatable token
3348-
# bool ignore_eos); // ignore the end-of-sequence token
3334+
# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
3335+
# float penalty_repeat, // 1.0 = disabled
3336+
# float penalty_freq, // 0.0 = disabled
3337+
# float penalty_present); // 0.0 = disabled
33493338
@ctypes_function(
33503339
"llama_sampler_init_penalties",
3351-
[
3352-
ctypes.c_int32,
3353-
llama_token,
3354-
llama_token,
3355-
ctypes.c_int32,
3356-
ctypes.c_float,
3357-
ctypes.c_float,
3358-
ctypes.c_float,
3359-
ctypes.c_bool,
3360-
ctypes.c_bool,
3361-
],
3340+
[ctypes.c_int32, ctypes.c_float, ctypes.c_float, ctypes.c_float],
33623341
llama_sampler_p_ctypes,
33633342
)
33643343
def llama_sampler_init_penalties(
3365-
n_vocab: int,
3366-
special_eos_id: int,
3367-
linefeed_id: int,
33683344
penalty_last_n: int,
33693345
penalty_repeat: float,
33703346
penalty_freq: float,
33713347
penalty_present: float,
3372-
penalize_nl: bool,
3373-
ignore_eos: bool,
33743348
/,
33753349
) -> llama_sampler_p:
33763350
...

vendor/llama.cpp

0 commit comments

Comments
 (0)