feat: Update llama.cpp

abetlen · abetlen · commit 9769e5719ad4 · 2024-08-31T08:59:41.000-04:00
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -153,7 +153,7 @@ def __init__(
             model_path: Path to the model.
             n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded.
             split_mode: How to split the model across GPUs. See llama_cpp.LLAMA_SPLIT_* for options.
-            main_gpu: main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model. LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results. LLAMA_SPLIT_LAYER: ignored
+            main_gpu: main_gpu interpretation depends on split_mode: LLAMA_SPLIT_MODE_NONE: the GPU that is used for the entire model. LLAMA_SPLIT_MODE_ROW: the GPU that is used for small tensors and intermediate results. LLAMA_SPLIT_MODE_LAYER: ignored
             tensor_split: How split tensors should be distributed across GPUs. If None, the model is not split.
             rpc_servers: Comma separated list of RPC servers to use for offloading
             vocab_only: Only load the vocabulary no weights.
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -711,9 +711,9 @@ class llama_model_kv_override(ctypes.Structure):
 #     enum llama_split_mode split_mode; // how to split the model across multiple GPUs
 
 #     // main_gpu interpretation depends on split_mode:
-#     // LLAMA_SPLIT_NONE: the GPU that is used for the entire model
-#     // LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results
-#     // LLAMA_SPLIT_LAYER: ignored
+#     // LLAMA_SPLIT_MODE_NONE: the GPU that is used for the entire model
+#     // LLAMA_SPLIT_MODE_ROW: the GPU that is used for small tensors and intermediate results
+#     // LLAMA_SPLIT_MODE_LAYER: ignored
 #     int32_t main_gpu;
 
 #     // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 42c76d1358021ccdbe8ba89109c143dd7ae166df
+Subproject commit a47667cff41f5a198eb791974e0afcc1cddd3229