|
256 | 256 | # // note: these values should be synchronized with ggml_rope
|
257 | 257 | # // TODO: maybe move this enum to ggml.h (ggml_rope_type)
|
258 | 258 | # enum llama_rope_type {
|
259 |
| -# LLAMA_ROPE_TYPE_NONE = -1, |
260 |
| -# LLAMA_ROPE_TYPE_NORM = 0, |
261 |
| -# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, |
| 259 | +# LLAMA_ROPE_TYPE_NONE = -1, |
| 260 | +# LLAMA_ROPE_TYPE_NORM = 0, |
| 261 | +# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, |
| 262 | +# LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE, |
| 263 | +# LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION, |
262 | 264 | # };
|
263 | 265 | LLAMA_ROPE_TYPE_NONE = -1
|
264 | 266 | LLAMA_ROPE_TYPE_NORM = 0
|
265 | 267 | LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX = 2
|
| 268 | +LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE = 8 |
| 269 | +LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION = 24 |
266 | 270 |
|
267 | 271 |
|
268 | 272 | # enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
|
@@ -1265,6 +1269,7 @@ def llama_rope_freq_scale_train(model: llama_model_p, /) -> float:
|
1265 | 1269 | # // Functions to access the model's GGUF metadata scalar values
|
1266 | 1270 | # // - The functions return the length of the string on success, or -1 on failure
|
1267 | 1271 | # // - The output string is always null-terminated and cleared on failure
|
| 1272 | +# // - When retrieving a string, an extra byte must be allocated to account for the null terminator |
1268 | 1273 | # // - GGUF array values are not supported by these functions
|
1269 | 1274 |
|
1270 | 1275 |
|
@@ -1378,18 +1383,6 @@ def llama_model_n_params(model: llama_model_p, /) -> int:
|
1378 | 1383 | ...
|
1379 | 1384 |
|
1380 | 1385 |
|
1381 |
| -# // Get a llama model tensor |
1382 |
| -# LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name); |
1383 |
| -@ctypes_function( |
1384 |
| - "llama_get_model_tensor", [llama_model_p_ctypes, ctypes.c_char_p], ctypes.c_void_p |
1385 |
| -) |
1386 |
| -def llama_get_model_tensor( |
1387 |
| - model: llama_model_p, name: Union[ctypes.c_char_p, bytes], / |
1388 |
| -) -> ctypes.c_void_p: |
1389 |
| - """Get a llama model tensor""" |
1390 |
| - ... |
1391 |
| - |
1392 |
| - |
1393 | 1386 | # // Returns true if the model contains an encoder that requires llama_encode() call
|
1394 | 1387 | # LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
|
1395 | 1388 | @ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool)
|
@@ -3336,41 +3329,22 @@ def llama_sampler_init_grammar(
|
3336 | 3329 | ...
|
3337 | 3330 |
|
3338 | 3331 |
|
| 3332 | +# /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first. |
3339 | 3333 | # LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
|
3340 |
| -# int32_t n_vocab, // llama_n_vocab() |
3341 |
| -# llama_token special_eos_id, // llama_token_eos() |
3342 |
| -# llama_token linefeed_id, // llama_token_nl() |
3343 |
| -# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size) |
3344 |
| -# float penalty_repeat, // 1.0 = disabled |
3345 |
| -# float penalty_freq, // 0.0 = disabled |
3346 |
| -# float penalty_present, // 0.0 = disabled |
3347 |
| -# bool penalize_nl, // consider newlines as a repeatable token |
3348 |
| -# bool ignore_eos); // ignore the end-of-sequence token |
| 3334 | +# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size) |
| 3335 | +# float penalty_repeat, // 1.0 = disabled |
| 3336 | +# float penalty_freq, // 0.0 = disabled |
| 3337 | +# float penalty_present); // 0.0 = disabled |
3349 | 3338 | @ctypes_function(
|
3350 | 3339 | "llama_sampler_init_penalties",
|
3351 |
| - [ |
3352 |
| - ctypes.c_int32, |
3353 |
| - llama_token, |
3354 |
| - llama_token, |
3355 |
| - ctypes.c_int32, |
3356 |
| - ctypes.c_float, |
3357 |
| - ctypes.c_float, |
3358 |
| - ctypes.c_float, |
3359 |
| - ctypes.c_bool, |
3360 |
| - ctypes.c_bool, |
3361 |
| - ], |
| 3340 | + [ctypes.c_int32, ctypes.c_float, ctypes.c_float, ctypes.c_float], |
3362 | 3341 | llama_sampler_p_ctypes,
|
3363 | 3342 | )
|
3364 | 3343 | def llama_sampler_init_penalties(
|
3365 |
| - n_vocab: int, |
3366 |
| - special_eos_id: int, |
3367 |
| - linefeed_id: int, |
3368 | 3344 | penalty_last_n: int,
|
3369 | 3345 | penalty_repeat: float,
|
3370 | 3346 | penalty_freq: float,
|
3371 | 3347 | penalty_present: float,
|
3372 |
| - penalize_nl: bool, |
3373 |
| - ignore_eos: bool, |
3374 | 3348 | /,
|
3375 | 3349 | ) -> llama_sampler_p:
|
3376 | 3350 | ...
|
|
0 commit comments