Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 76aafa6

Browse files
phiharriabetlen
andauthored
Implement GGUF metadata KV overrides (abetlen#1011)
* Implement GGUF metadata overrides * whitespace fix * Fix kv overrides. * Fix pointer and pickle * Match llama.cpp kv_overrides cli argument --------- Co-authored-by: Andrei <[email protected]>
1 parent 7eff42c commit 76aafa6

File tree

3 files changed

+55
-1
lines changed

3 files changed

+55
-1
lines changed

llama_cpp/llama.py

+32
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ def __init__(
735735
vocab_only: bool = False,
736736
use_mmap: bool = True,
737737
use_mlock: bool = False,
738+
kv_overrides: Optional[Dict[str, Union[bool, int, float]]] = None,
738739
# Context Params
739740
seed: int = llama_cpp.LLAMA_DEFAULT_SEED,
740741
n_ctx: int = 512,
@@ -803,6 +804,7 @@ def __init__(
803804
vocab_only: Only load the vocabulary no weights.
804805
use_mmap: Use mmap if possible.
805806
use_mlock: Force the system to keep the model in RAM.
807+
kv_overrides: Key-value overrides for the model.
806808
seed: RNG seed, -1 for random
807809
n_ctx: Text context, 0 = from model
808810
n_batch: Prompt processing maximum batch size
@@ -866,6 +868,34 @@ def __init__(
866868
self.model_params.use_mmap = use_mmap if lora_path is None else False
867869
self.model_params.use_mlock = use_mlock
868870

871+
self.kv_overrides = kv_overrides
872+
if kv_overrides is not None:
873+
n_overrides = len(kv_overrides)
874+
self._kv_overrides_array = llama_cpp.llama_model_kv_override * (n_overrides + 1)
875+
self._kv_overrides_array_keys = []
876+
877+
for k, v in kv_overrides.items():
878+
key_buf = ctypes.create_string_buffer(k.encode("utf-8"))
879+
self._kv_overrides_array_keys.append(key_buf)
880+
self._kv_overrides_array[i].key = key_buf
881+
if isinstance(v, int):
882+
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_INT
883+
self._kv_overrides_array[i].value.int_value = v
884+
elif isinstance(v, float):
885+
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_FLOAT
886+
self._kv_overrides_array[i].value.float_value = v
887+
elif isinstance(v, bool):
888+
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_BOOL
889+
self._kv_overrides_array[i].value.bool_value = v
890+
else:
891+
raise ValueError(f"Unknown value type for {k}: {v}")
892+
893+
self._kv_overrides_array_sentinel_key = b'\0'
894+
895+
# null array sentinel
896+
self._kv_overrides_array[n_overrides].key = self._kv_overrides_array_sentinel_key
897+
self.model_params.kv_overrides = self._kv_overrides_array
898+
869899
self.n_batch = min(n_ctx, n_batch) # ???
870900
self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1)
871901
self.n_threads_batch = n_threads_batch or max(
@@ -2148,6 +2178,7 @@ def __getstate__(self):
21482178
vocab_only=self.model_params.vocab_only,
21492179
use_mmap=self.model_params.use_mmap,
21502180
use_mlock=self.model_params.use_mlock,
2181+
kv_overrides=self.kv_overrides,
21512182
# Context Params
21522183
seed=self.context_params.seed,
21532184
n_ctx=self.context_params.n_ctx,
@@ -2190,6 +2221,7 @@ def __setstate__(self, state):
21902221
vocab_only=state["vocab_only"],
21912222
use_mmap=state["use_mmap"],
21922223
use_mlock=state["use_mlock"],
2224+
kv_overrides=state["kv_overrides"],
21932225
# Context Params
21942226
seed=state["seed"],
21952227
n_ctx=state["n_ctx"],

llama_cpp/server/model.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Optional, Union, List
3+
from typing import Dict, Optional, Union, List
44

55
import llama_cpp
66

@@ -71,6 +71,23 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
7171
chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(
7272
clip_model_path=settings.clip_model_path, verbose=settings.verbose
7373
)
74+
75+
kv_overrides: Optional[Dict[str, Union[bool, int, float]]] = None
76+
if settings.kv_overrides is not None:
77+
assert isinstance(settings.kv_overrides, list)
78+
kv_overrides = {}
79+
for kv in settings.kv_overrides:
80+
key, value = kv.split("=")
81+
if ":" in value:
82+
value_type, value = value.split(":")
83+
if value_type == "bool":
84+
kv_overrides[key] = value.lower() in ["true", "1"]
85+
elif value_type == "int":
86+
kv_overrides[key] = int(value)
87+
elif value_type == "float":
88+
kv_overrides[key] = float(value)
89+
else:
90+
raise ValueError(f"Unknown value type {value_type}")
7491

7592
_model = llama_cpp.Llama(
7693
model_path=settings.model,
@@ -81,6 +98,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
8198
vocab_only=settings.vocab_only,
8299
use_mmap=settings.use_mmap,
83100
use_mlock=settings.use_mlock,
101+
kv_overrides=kv_overrides,
84102
# Context Params
85103
seed=settings.seed,
86104
n_ctx=settings.n_ctx,

llama_cpp/server/settings.py

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ class ModelSettings(BaseSettings):
4848
default=llama_cpp.llama_mlock_supported(),
4949
description="Use mlock.",
5050
)
51+
kv_overrides: Optional[List[str]] = Field(
52+
default=None,
53+
description="List of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.",
54+
)
5155
# Context Params
5256
seed: int = Field(
5357
default=llama_cpp.LLAMA_DEFAULT_SEED, description="Random seed. -1 for random."

0 commit comments

Comments
 (0)