1use std::ffi::NulError;
17use std::fmt::Debug;
18use std::num::NonZeroI32;
19
20use crate::llama_batch::BatchAddError;
21use std::os::raw::c_int;
22use std::path::PathBuf;
23use std::string::FromUtf8Error;
24
25pub mod context;
26pub mod llama_backend;
27pub mod llama_batch;
28mod log;
29pub mod model;
30#[cfg(feature = "mtmd")]
31pub mod mtmd;
32pub mod sampling;
33pub mod timing;
34pub mod token;
35pub mod token_type;
36
37pub type Result<T> = std::result::Result<T, LLamaCppError>;
39
40#[derive(Debug, Eq, PartialEq, thiserror::Error)]
42pub enum LLamaCppError {
43 #[error("BackendAlreadyInitialized")]
46 BackendAlreadyInitialized,
47 #[error("{0}")]
49 ChatTemplateError(#[from] ChatTemplateError),
50 #[error("{0}")]
52 DecodeError(#[from] DecodeError),
53 #[error("{0}")]
55 EncodeError(#[from] EncodeError),
56 #[error("{0}")]
58 LlamaModelLoadError(#[from] LlamaModelLoadError),
59 #[error("{0}")]
61 LlamaContextLoadError(#[from] LlamaContextLoadError),
62 #[error["{0}"]]
64 BatchAddError(#[from] BatchAddError),
65 #[error(transparent)]
67 EmbeddingError(#[from] EmbeddingsError),
68 #[error("Backend device {0} not found")]
71 BackendDeviceNotFound(usize),
72 #[error("Max devices exceeded. Max devices is {0}")]
74 MaxDevicesExceeded(usize),
75}
76
77#[derive(Debug, Eq, PartialEq, thiserror::Error)]
79pub enum ChatTemplateError {
80 #[error("chat template not found - returned null pointer")]
82 MissingTemplate,
83
84 #[error("null byte in string {0}")]
86 NullError(#[from] NulError),
87
88 #[error(transparent)]
90 Utf8Error(#[from] std::str::Utf8Error),
91}
92
93#[derive(Debug, Eq, PartialEq, thiserror::Error)]
95pub enum MetaValError {
96 #[error("null byte in string {0}")]
98 NullError(#[from] NulError),
99
100 #[error("FromUtf8Error {0}")]
102 FromUtf8Error(#[from] FromUtf8Error),
103
104 #[error("Negative return value. Likely due to a missing index or key. Got return value: {0}")]
106 NegativeReturn(i32),
107}
108
109#[derive(Debug, Eq, PartialEq, thiserror::Error)]
111pub enum LlamaContextLoadError {
112 #[error("null reference from llama.cpp")]
114 NullReturn,
115}
116
117#[derive(Debug, Eq, PartialEq, thiserror::Error)]
119pub enum DecodeError {
120 #[error("Decode Error 1: NoKvCacheSlot")]
122 NoKvCacheSlot,
123 #[error("Decode Error -1: n_tokens == 0")]
125 NTokensZero,
126 #[error("Decode Error {0}: unknown")]
128 Unknown(c_int),
129}
130
131#[derive(Debug, Eq, PartialEq, thiserror::Error)]
133pub enum EncodeError {
134 #[error("Encode Error 1: NoKvCacheSlot")]
136 NoKvCacheSlot,
137 #[error("Encode Error -1: n_tokens == 0")]
139 NTokensZero,
140 #[error("Encode Error {0}: unknown")]
142 Unknown(c_int),
143}
144
145#[derive(Debug, Eq, PartialEq, thiserror::Error)]
147pub enum EmbeddingsError {
148 #[error("Embeddings weren't enabled in the context options")]
150 NotEnabled,
151 #[error("Logits were not enabled for the given token")]
153 LogitsNotEnabled,
154 #[error("Can't use sequence embeddings with a model supporting only LLAMA_POOLING_TYPE_NONE")]
156 NonePoolType,
157}
158
159impl From<NonZeroI32> for DecodeError {
161 fn from(value: NonZeroI32) -> Self {
162 match value.get() {
163 1 => DecodeError::NoKvCacheSlot,
164 -1 => DecodeError::NTokensZero,
165 i => DecodeError::Unknown(i),
166 }
167 }
168}
169
170impl From<NonZeroI32> for EncodeError {
172 fn from(value: NonZeroI32) -> Self {
173 match value.get() {
174 1 => EncodeError::NoKvCacheSlot,
175 -1 => EncodeError::NTokensZero,
176 i => EncodeError::Unknown(i),
177 }
178 }
179}
180
181#[derive(Debug, Eq, PartialEq, thiserror::Error)]
183pub enum LlamaModelLoadError {
184 #[error("null byte in string {0}")]
186 NullError(#[from] NulError),
187 #[error("null result from llama cpp")]
189 NullResult,
190 #[error("failed to convert path {0} to str")]
192 PathToStrError(PathBuf),
193}
194
195#[derive(Debug, Eq, PartialEq, thiserror::Error)]
197pub enum LlamaLoraAdapterInitError {
198 #[error("null byte in string {0}")]
200 NullError(#[from] NulError),
201 #[error("null result from llama cpp")]
203 NullResult,
204 #[error("failed to convert path {0} to str")]
206 PathToStrError(PathBuf),
207}
208
209#[derive(Debug, Eq, PartialEq, thiserror::Error)]
211pub enum LlamaLoraAdapterSetError {
212 #[error("error code from llama cpp")]
214 ErrorResult(i32),
215}
216
217#[derive(Debug, Eq, PartialEq, thiserror::Error)]
219pub enum LlamaLoraAdapterRemoveError {
220 #[error("error code from llama cpp")]
222 ErrorResult(i32),
223}
224
225#[must_use]
234pub fn llama_time_us() -> i64 {
235 unsafe { llama_cpp_sys_2::llama_time_us() }
236}
237
238#[must_use]
245pub fn max_devices() -> usize {
246 unsafe { llama_cpp_sys_2::llama_max_devices() }
247}
248
249#[must_use]
258pub fn mmap_supported() -> bool {
259 unsafe { llama_cpp_sys_2::llama_supports_mmap() }
260}
261
262#[must_use]
271pub fn mlock_supported() -> bool {
272 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
273}
274
275#[derive(Debug, thiserror::Error, Clone)]
277#[non_exhaustive]
278pub enum TokenToStringError {
279 #[error("Unknown Token Type")]
281 UnknownTokenType,
282 #[error("Insufficient Buffer Space {0}")]
284 InsufficientBufferSpace(c_int),
285 #[error("FromUtf8Error {0}")]
287 FromUtf8Error(#[from] FromUtf8Error),
288}
289
290#[derive(Debug, thiserror::Error)]
292pub enum StringToTokenError {
293 #[error("{0}")]
295 NulError(#[from] NulError),
296 #[error("{0}")]
297 CIntConversionError(#[from] std::num::TryFromIntError),
299}
300
301#[derive(Debug, thiserror::Error)]
303pub enum NewLlamaChatMessageError {
304 #[error("{0}")]
306 NulError(#[from] NulError),
307}
308
309#[derive(Debug, thiserror::Error)]
311pub enum ApplyChatTemplateError {
312 #[error("{0}")]
314 NulError(#[from] NulError),
315 #[error("{0}")]
317 FromUtf8Error(#[from] FromUtf8Error),
318}
319
320#[must_use]
338pub fn ggml_time_us() -> i64 {
339 unsafe { llama_cpp_sys_2::ggml_time_us() }
340}
341
342#[must_use]
354pub fn llama_supports_mlock() -> bool {
355 unsafe { llama_cpp_sys_2::llama_supports_mlock() }
356}
357
358#[derive(Debug, Clone, Copy, PartialEq, Eq)]
360pub enum LlamaBackendDeviceType {
361 Cpu,
363 Accelerator,
365 Gpu,
367 IntegratedGpu,
369 Unknown,
371}
372
373#[derive(Debug, Clone)]
377pub struct LlamaBackendDevice {
378 pub index: usize,
382 pub name: String,
384 pub description: String,
386 pub backend: String,
388 pub memory_total: usize,
390 pub memory_free: usize,
392 pub device_type: LlamaBackendDeviceType,
394}
395
396#[must_use]
398pub fn list_llama_ggml_backend_devices() -> Vec<LlamaBackendDevice> {
399 let mut devices = Vec::new();
400 for i in 0..unsafe { llama_cpp_sys_2::ggml_backend_dev_count() } {
401 fn cstr_to_string(ptr: *const i8) -> String {
402 if ptr.is_null() {
403 String::new()
404 } else {
405 unsafe { std::ffi::CStr::from_ptr(ptr) }
406 .to_string_lossy()
407 .to_string()
408 }
409 }
410 let dev = unsafe { llama_cpp_sys_2::ggml_backend_dev_get(i) };
411 let props = unsafe {
412 let mut props = std::mem::zeroed();
413 llama_cpp_sys_2::ggml_backend_dev_get_props(dev, &raw mut props);
414 props
415 };
416 let name = cstr_to_string(props.name);
417 let description = cstr_to_string(props.description);
418 let backend = unsafe { llama_cpp_sys_2::ggml_backend_dev_backend_reg(dev) };
419 let backend_name = unsafe { llama_cpp_sys_2::ggml_backend_reg_name(backend) };
420 let backend = cstr_to_string(backend_name);
421 let memory_total = props.memory_total;
422 let memory_free = props.memory_free;
423 let device_type = match props.type_ {
424 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_CPU => LlamaBackendDeviceType::Cpu,
425 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_ACCEL => LlamaBackendDeviceType::Accelerator,
426 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_GPU => LlamaBackendDeviceType::Gpu,
427 llama_cpp_sys_2::GGML_BACKEND_DEVICE_TYPE_IGPU => LlamaBackendDeviceType::IntegratedGpu,
428 _ => LlamaBackendDeviceType::Unknown,
429 };
430 devices.push(LlamaBackendDevice {
431 index: i,
432 name,
433 description,
434 backend,
435 memory_total,
436 memory_free,
437 device_type,
438 });
439 }
440 devices
441}
442
443#[derive(Default, Debug, Clone)]
445pub struct LogOptions {
446 disabled: bool,
447}
448
449impl LogOptions {
450 pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
453 self.disabled = !enabled;
454 self
455 }
456}
457
458extern "C" fn logs_to_trace(
459 level: llama_cpp_sys_2::ggml_log_level,
460 text: *const ::std::os::raw::c_char,
461 data: *mut ::std::os::raw::c_void,
462) {
463 use std::borrow::Borrow;
468
469 let log_state = unsafe { &*(data as *const log::State) };
470
471 if log_state.options.disabled {
472 return;
473 }
474
475 if !log_state.is_enabled_for_level(level) {
477 log_state.update_previous_level_for_disabled_log(level);
478 return;
479 }
480
481 let text = unsafe { std::ffi::CStr::from_ptr(text) };
482 let text = text.to_string_lossy();
483 let text: &str = text.borrow();
484
485 if level == llama_cpp_sys_2::GGML_LOG_LEVEL_CONT {
491 log_state.cont_buffered_log(text);
492 } else if text.ends_with('\n') {
493 log_state.emit_non_cont_line(level, text);
494 } else {
495 log_state.buffer_non_cont(level, text);
496 }
497}
498
499pub fn send_logs_to_tracing(options: LogOptions) {
501 let llama_heap_state = Box::as_ref(
508 log::LLAMA_STATE
509 .get_or_init(|| Box::new(log::State::new(log::Module::LlamaCpp, options.clone()))),
510 ) as *const _;
511 let ggml_heap_state = Box::as_ref(
512 log::GGML_STATE.get_or_init(|| Box::new(log::State::new(log::Module::GGML, options))),
513 ) as *const _;
514
515 unsafe {
516 llama_cpp_sys_2::llama_log_set(Some(logs_to_trace), llama_heap_state as *mut _);
518 llama_cpp_sys_2::ggml_log_set(Some(logs_to_trace), ggml_heap_state as *mut _);
519 }
520}