feat: Update llama.cpp

abetlen · abetlen · commit 08e910f7a7e3 · 2024-03-10T23:45:05.000-04:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -1728,6 +1728,17 @@ def llama_set_n_threads(
     """
     ...
 
+
+# // Set whether to use causal attention or not
+# // If set to true, the model will only attend to the past tokens
+# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
+@ctypes_function("llama_set_causal_attn", [llama_context_p_ctypes, ctypes.c_bool], None)
+def llama_set_causal_attn(ctx: llama_context_p, causal_attn: bool, /):
+    """Set whether to use causal attention or not
+    If set to true, the model will only attend to the past tokens"""
+    ...
+
+
 # // Set abort callback
 # LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
 @ctypes_function(
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit c2101a2e909ac7c08976d414e64e96c90ee5fa9e
+Subproject commit 3814a07392d2bdc22911652bc7c2f9bdb0ce042e