Use all available CPUs for batch processing (abetlen#1345)

ddh0 · abetlen · web-flow · commit 01884820f19a · 2024-04-17T10:04:33.000-04:00
* Use all cores for n_threads_batch

* Actually use all CPUs

* Update setting for server as well

---------

Co-authored-by: Andrei Betlen &lt;abetlen@gmail.com&gt;
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -262,9 +262,7 @@ def __init__(
 
         self.n_batch = min(n_ctx, n_batch)  # ???
         self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1)
-        self.n_threads_batch = n_threads_batch or max(
-            multiprocessing.cpu_count() // 2, 1
-        )
+        self.n_threads_batch = n_threads_batch or multiprocessing.cpu_count()
 
         # Context Params
         self.context_params = llama_cpp.llama_context_default_params()
diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py
@@ -70,7 +70,7 @@ class ModelSettings(BaseSettings):
         description="The number of threads to use.",
     )
     n_threads_batch: int = Field(
-        default=max(multiprocessing.cpu_count() // 2, 1),
+        default=max(multiprocessing.cpu_count(), 1),
         ge=0,
         description="The number of threads to use when batch processing.",
     )

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ class ModelSettings(BaseSettings):`
`70`	`70`	`description="The number of threads to use.",`
`71`	`71`	`)`
`72`	`72`	`n_threads_batch: int = Field(`
`73`		`- default=max(multiprocessing.cpu_count() // 2, 1),`
	`73`	`+ default=max(multiprocessing.cpu_count(), 1),`
`74`	`74`	`ge=0,`
`75`	`75`	`description="The number of threads to use when batch processing.",`
`76`	`76`	`)`