Enable multi-threading support

markcda · markcda · commit bb4c97967bb8 · 2023-11-06T21:13:28.000+03:00
diff --git a/binding.cpp b/binding.cpp
@@ -142,6 +142,8 @@ int llama_predict(void *params_ptr, void *state_pr, char *result, bool debug)
 {
     gpt_params *params_p = (gpt_params *)params_ptr;
     llama_context *ctx = (llama_context *)state_pr;
+    
+    llama_set_n_threads(ctx, params_p->n_threads, params_p->n_threads_batch);
 
     const int n_ctx = llama_n_ctx(ctx);
 
@@ -631,6 +633,7 @@ void *llama_allocate_params(const char *prompt, int seed, int threads, int token
     gpt_params *params = new gpt_params;
     params->seed = seed;
     params->n_threads = threads;
+    params->n_threads_batch = threads;
     params->n_predict = tokens;
     params->repeat_last_n = repeat_last_n;
     params->prompt_cache_ro = prompt_cache_ro;