Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0188482

Browse files
ddh0abetlen
andauthored
Use all available CPUs for batch processing (abetlen#1345)
* Use all cores for n_threads_batch * Actually use all CPUs * Update setting for server as well --------- Co-authored-by: Andrei Betlen <[email protected]>
1 parent a420f96 commit 0188482

File tree

2 files changed

+2
-4
lines changed

2 files changed

+2
-4
lines changed

llama_cpp/llama.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,7 @@ def __init__(
262262

263263
self.n_batch = min(n_ctx, n_batch) # ???
264264
self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1)
265-
self.n_threads_batch = n_threads_batch or max(
266-
multiprocessing.cpu_count() // 2, 1
267-
)
265+
self.n_threads_batch = n_threads_batch or multiprocessing.cpu_count()
268266

269267
# Context Params
270268
self.context_params = llama_cpp.llama_context_default_params()

llama_cpp/server/settings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class ModelSettings(BaseSettings):
7070
description="The number of threads to use.",
7171
)
7272
n_threads_batch: int = Field(
73-
default=max(multiprocessing.cpu_count() // 2, 1),
73+
default=max(multiprocessing.cpu_count(), 1),
7474
ge=0,
7575
description="The number of threads to use when batch processing.",
7676
)

0 commit comments

Comments
 (0)