File tree 1 file changed +2
-1
lines changed
1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -239,6 +239,7 @@ def __init__(
239
239
n_ctx: Maximum context size.
240
240
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
241
241
seed: Random seed. -1 for random.
242
+ n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded.
242
243
f16_kv: Use half-precision for key/value cache.
243
244
logits_all: Return logits for all tokens, not just the last token.
244
245
vocab_only: Only load the vocabulary no weights.
@@ -267,7 +268,7 @@ def __init__(
267
268
268
269
self .params = llama_cpp .llama_context_default_params ()
269
270
self .params .n_ctx = n_ctx
270
- self .params .n_gpu_layers = n_gpu_layers
271
+ self .params .n_gpu_layers = 0x7FFFFFFF if n_gpu_layers == - 1 else n_gpu_layers # 0x7FFFFFFF is INT32 max, will be auto set to all layers
271
272
self .params .seed = seed
272
273
self .params .f16_kv = f16_kv
273
274
self .params .logits_all = logits_all
You can’t perform that action at this time.
0 commit comments