diff --git a/README.md b/README.md
index 5cd51ece1..f56842d29 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,10 @@
+
+
🚨 Archived Repository Notice
+
This repository is no longer actively maintained.
+
Development has moved to menloresearch/llama.cpp.
+
Please contribute directly to llama.cpp
moving forward.
+
+
# Cortex
diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc
index 2bba11a7b..74bf0d1b8 100644
--- a/engine/extensions/local-engine/local_engine.cc
+++ b/engine/extensions/local-engine/local_engine.cc
@@ -1,6 +1,9 @@
#include "local_engine.h"
+#include
#include
+#include
#include
+#include
#include
#include "utils/curl_utils.h"
#include "utils/json_helper.h"
@@ -34,6 +37,7 @@ const std::unordered_map kParamsMap = {
{"dynatemp_exponent", "--dynatemp-exp"},
{"ctx_len", "--ctx-size"},
{"ngl", "-ngl"},
+ {"reasoning_budget", "--reasoning-budget"},
};
int GenerateRandomInteger(int min, int max) {
@@ -42,18 +46,26 @@ int GenerateRandomInteger(int min, int max) {
std::uniform_int_distribution<> dis(
min, max); // Distribution for the desired range
- return dis(gen); // Generate and return a random integer within the range
+ return dis(gen);
}
std::vector ConvertJsonToParamsVector(const Json::Value& root) {
std::vector res;
std::string errors;
+ res.push_back("--no-webui");
for (const auto& member : root.getMemberNames()) {
if (member == "model_path" || member == "llama_model_path") {
if (!root[member].isNull()) {
+ const std::string path = root[member].asString();
res.push_back("--model");
- res.push_back(root[member].asString());
+ res.push_back(path);
+
+ // If path contains both "Jan" and "nano", case-insensitive, add special params
+ std::string lowered = path;
+ std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) {
+ return std::tolower(c);
+ });
}
continue;
} else if (kIgnoredParams.find(member) != kIgnoredParams.end()) {
@@ -85,8 +97,15 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) {
res.push_back("--ignore_eos");
}
continue;
+ } else if (member == "ctx_len") {
+ if (!root[member].isNull()) {
+ res.push_back("--ctx-size");
+ res.push_back(root[member].asString());
+ }
+ continue;
}
+ // Generic handling for other members
res.push_back("--" + member);
if (root[member].isString()) {
res.push_back(root[member].asString());
@@ -105,7 +124,7 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) {
ss << "\"" << value.asString() << "\"";
first = false;
}
- ss << "] ";
+ ss << "]";
res.push_back(ss.str());
}
}
@@ -113,6 +132,7 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) {
return res;
}
+
constexpr const auto kMinDataChunkSize = 6u;
struct OaiInfo {
@@ -561,8 +581,6 @@ void LocalEngine::LoadModel(std::shared_ptr json_body,
params.push_back("--port");
params.push_back(std::to_string(s.port));
- params.push_back("--pooling");
- params.push_back("mean");
params.push_back("--jinja");
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index 68f0fe070..a3771e0a1 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -165,8 +165,8 @@ ModelService::ModelService(std::shared_ptr db_service,
download_service_{download_service},
inference_svc_(inference_service),
engine_svc_(engine_svc),
- task_queue_(task_queue) {
- // ProcessBgrTasks();
+ task_queue_(task_queue){
+ // ProcessBgrTasks();
};
void ModelService::ForceIndexingModelList() {
@@ -500,13 +500,10 @@ cpp::result ModelService::DeleteModel(
std::filesystem::remove(yaml_fp);
CTL_INF("Removed: " << yaml_fp.string());
} else {
- // Remove yaml files
- for (const auto& entry :
- std::filesystem::directory_iterator(yaml_fp.parent_path())) {
- if (entry.is_regular_file() && (entry.path().extension() == ".yml")) {
- std::filesystem::remove(entry);
- CTL_INF("Removed: " << entry.path().string());
- }
+ // Is a local model - Remove only this model's yaml file
+ if (std::filesystem::exists(yaml_fp)) {
+ std::filesystem::remove(yaml_fp);
+ CTL_INF("Removed: " << yaml_fp.string());
}
}
@@ -557,6 +554,8 @@ cpp::result ModelService::StartModel(
if (auto& o = params_override["ctx_len"]; !o.isNull()) {
ctx_len = o.asInt();
}
+ Json::Value model_load_params;
+ json_helper::MergeJson(model_load_params, params_override);
try {
constexpr const int kDefautlContextLength = 8192;
@@ -627,9 +626,14 @@ cpp::result ModelService::StartModel(
#if defined(_WIN32)
json_data["model_path"] = cortex::wc::WstringToUtf8(
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring());
+ model_load_params["model_path"] =
+ cortex::wc::WstringToUtf8(
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring());
#else
json_data["model_path"] =
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
+ model_load_params["model_path"] =
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
#endif
} else {
LOG_WARN << "model_path is empty";
@@ -642,6 +646,8 @@ cpp::result ModelService::StartModel(
#else
json_data["mmproj"] =
fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
+ model_load_params["model_path"] =
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
#endif
}
json_data["system_prompt"] = mc.system_template;
@@ -655,6 +661,7 @@ cpp::result ModelService::StartModel(
}
json_data["model"] = model_handle;
+ model_load_params["model"] = model_handle;
if (auto& cpt = custom_prompt_template; !cpt.value_or("").empty()) {
auto parse_prompt_result = string_utils::ParsePrompt(cpt.value());
json_data["system_prompt"] = parse_prompt_result.system_prompt;
@@ -662,8 +669,6 @@ cpp::result ModelService::StartModel(
json_data["ai_prompt"] = parse_prompt_result.ai_prompt;
}
- json_helper::MergeJson(json_data, params_override);
-
// Set default cpu_threads if it is not configured
if (!json_data.isMember("cpu_threads")) {
json_data["cpu_threads"] = GetCpuThreads();
@@ -686,12 +691,12 @@ cpp::result ModelService::StartModel(
assert(!!inference_svc_);
- auto ir =
- inference_svc_->LoadModel(std::make_shared(json_data));
+ auto ir = inference_svc_->LoadModel(
+ std::make_shared(model_load_params));
auto status = std::get<0>(ir)["status_code"].asInt();
auto data = std::get<1>(ir);
- if (status == drogon::k200OK) {
+ if (status == drogon::k200OK) {
return StartModelResult{/* .success = */ true,
/* .warning = */ may_fallback_res.value()};
} else if (status == drogon::k409Conflict) {
@@ -1031,13 +1036,15 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
auto es = hardware::EstimateLLaMACppRun(model_path, rc);
if (!!es && (*es).gpu_mode.vram_MiB > free_vram_MiB && is_cuda) {
- CTL_WRN("Not enough VRAM - " << "required: " << (*es).gpu_mode.vram_MiB
- << ", available: " << free_vram_MiB);
+ CTL_WRN("Not enough VRAM - "
+ << "required: " << (*es).gpu_mode.vram_MiB
+ << ", available: " << free_vram_MiB);
}
if (!!es && (*es).cpu_mode.ram_MiB > free_ram_MiB) {
- CTL_WRN("Not enough RAM - " << "required: " << (*es).cpu_mode.ram_MiB
- << ", available: " << free_ram_MiB);
+ CTL_WRN("Not enough RAM - "
+ << "required: " << (*es).cpu_mode.ram_MiB
+ << ", available: " << free_ram_MiB);
}
return warning;