From 005e2684e743688df320264bba2bdc00aa99abf2 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 11 Apr 2025 10:59:07 +0700 Subject: [PATCH 01/12] fix: handle more parameters (#2199) * fix: handle more parameters * fix: generate version.txt if not exist * fix: only add path if exists --- engine/extensions/local-engine/local_engine.cc | 15 ++++++++++++++- engine/services/engine_service.cc | 12 ++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index 885c14d77..b769c5e8c 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -20,7 +20,7 @@ const std::unordered_set kIgnoredParams = { "user_prompt", "min_keep", "mirostat", "mirostat_eta", "mirostat_tau", "text_model", "version", "n_probs", "object", "penalize_nl", "precision", "size", - "stop", "tfs_z", "typ_p"}; + "stop", "tfs_z", "typ_p", "caching_enabled"}; const std::unordered_map kParamsMap = { {"cpu_threads", "--threads"}, @@ -67,6 +67,19 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) { res.push_back("--embedding"); } continue; + } else if (member == "cache_type") { + if (!root[member].isNull()) { + res.push_back("-ctk"); + res.push_back(root[member].asString()); + res.push_back("-ctv"); + res.push_back(root[member].asString()); + } + continue; + } else if (member == "use_mmap") { + if (!root[member].asBool()) { + res.push_back("--no-mmap"); + } + continue; } res.push_back("--" + member); diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 89cd00058..15c7148c7 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -772,7 +772,13 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const { // try to find version.txt auto version_txt_path = version_entry.path() / "version.txt"; if (!std::filesystem::exists(version_txt_path)) { - continue; + // create new one + std::ofstream meta(version_txt_path, std::ios::out); + meta << "name: " << entry.path().filename() << std::endl; + meta << "version: " << version_entry.path().filename() << std::endl; + meta.close(); + CTL_INF("name: " << entry.path().filename().string() << ", version: " + << version_entry.path().filename().string()); } try { @@ -865,7 +871,9 @@ void EngineService::RegisterEngineLibPath() { // register deps std::vector paths{}; - paths.push_back(cuda_path); + if (std::filesystem::exists(cuda_path)) { + paths.push_back(cuda_path); + } paths.push_back(engine_dir_path); CTL_DBG("Registering dylib for " From 7fda1868620c9e05e205ddde4f8e51fde5baaa71 Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Fri, 11 Apr 2025 11:10:41 +0700 Subject: [PATCH 02/12] chore: separate mac binary (#2198) --- .github/workflows/template-build-macos.yml | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml index 038546097..ea96d2df6 100644 --- a/.github/workflows/template-build-macos.yml +++ b/.github/workflows/template-build-macos.yml @@ -253,6 +253,14 @@ jobs: cd engine make codesign-binary CODE_SIGN=true DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" DESTINATION_BINARY_NAME="${{ steps.set-output-params.outputs.destination_binary_name }}" DESTINATION_BINARY_SERVER_NAME="${{ steps.set-output-params.outputs.destination_binary_server_name }}" + - name: Code Signing binaries for separate binary + run: | + codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_name }} + codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_server_name }} + + codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_name }} + codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_server_name }} + - name: Notary macOS Binary run: | curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sh -s -- -b /usr/local/bin @@ -265,6 +273,18 @@ jobs: QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }} QUILL_NOTARY_KEY: "/tmp/notary-key.p8" + - name: Notary macOS Binary for separate binary + run: | + # Notarize the binary + quill notarize ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_name }} + quill notarize ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_server_name }} + quill notarize ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_name }} + quill notarize ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_server_name }} + env: + QUILL_NOTARY_KEY_ID: ${{ secrets.NOTARY_KEY_ID }} + QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }} + QUILL_NOTARY_KEY: "/tmp/notary-key.p8" + - name: Build network Installers shell: bash run: | @@ -310,6 +330,24 @@ jobs: xcrun notarytool submit ${{ steps.set-output-params.outputs.package_name }}-local.pkg --apple-id ${{ secrets.APPLE_ID }} --password ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} --team-id ${{ secrets.APPLE_TEAM_ID }} --wait - name: Package + run: | + mkdir temp + # Mac arm64 + mv cortex-${{ inputs.new_version }}-mac-arm64 temp/cortex + cd temp + tar -czvf cortex-arm64.tar.gz cortex + mv cortex-arm64.tar.gz ../cortex-arm64.tar.gz + cd .. + rm -rf temp/cortex + + # Mac amd64 + mv cortex-${{ inputs.new_version }}-mac-amd64 temp/cortex + cd temp + tar -czvf cortex-amd64.tar.gz cortex + mv cortex-amd64.tar.gz ../cortex-amd64.tar.gz + cd .. + + - name: Package for separate binary run: | cd engine make package @@ -320,6 +358,18 @@ jobs: name: cortex-${{ inputs.new_version }}-mac-universal path: ./engine/cortex + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: cortex-${{ inputs.new_version }}-mac-arm64-signed + path: ./cortex-${{ inputs.new_version }}-mac-arm64 + + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: cortex-${{ inputs.new_version }}-mac-amd64-signed + path: ./cortex-${{ inputs.new_version }}-mac-amd64 + - name: Upload Artifact uses: actions/upload-artifact@v4 with: @@ -358,6 +408,28 @@ jobs: asset_name: cortex-${{ inputs.new_version }}-mac-universal.tar.gz asset_content_type: application/zip + - name: Upload release assert if public provider is github + if: inputs.public_provider == 'github' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + uses: actions/upload-release-asset@v1.0.1 + with: + upload_url: ${{ inputs.upload_url }} + asset_path: ./cortex-arm64.tar.gz + asset_name: cortex-${{ inputs.new_version }}-mac-arm64.tar.gz + asset_content_type: application/zip + + - name: Upload release assert if public provider is github + if: inputs.public_provider == 'github' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + uses: actions/upload-release-asset@v1.0.1 + with: + upload_url: ${{ inputs.upload_url }} + asset_path: ./cortex-amd64.tar.gz + asset_name: cortex-${{ inputs.new_version }}-mac-amd64.tar.gz + asset_content_type: application/zip + - name: Upload release assert if public provider is github if: inputs.public_provider == 'github' env: From 3d74607cdd7abf431ed2c78fbb15760c4f786591 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 6 May 2025 00:42:08 -0700 Subject: [PATCH 03/12] chore: function calling cleanup (#2195) * chore: function calling cleanup * chore: cleanup --------- Co-authored-by: sangjanai --- docs/docs/guides/function-calling.md | 30 ++- engine/controllers/server.cc | 1 - .../extensions/local-engine/local_engine.cc | 1 + engine/services/inference_service.cc | 47 ----- engine/services/model_service.cc | 26 +-- engine/services/model_service.h | 9 - .../test/components/test_function_calling.cc | 157 ---------------- engine/utils/cli_selection_utils.h | 26 +-- engine/utils/function_calling/common.h | 153 ---------------- function-calling.py | 173 ++++++++++++++++++ 10 files changed, 214 insertions(+), 409 deletions(-) delete mode 100644 engine/test/components/test_function_calling.cc create mode 100644 function-calling.py diff --git a/docs/docs/guides/function-calling.md b/docs/docs/guides/function-calling.md index 6b9157f18..7725f225d 100644 --- a/docs/docs/guides/function-calling.md +++ b/docs/docs/guides/function-calling.md @@ -63,8 +63,14 @@ tools = [ completion_payload = { "messages": [ - {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."}, - {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"}, + { + "role": "system", + "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.', + }, + { + "role": "user", + "content": "Hi, can you tell me the delivery date for my order?" + }, ] } @@ -126,10 +132,22 @@ Once the user provides their order ID: ```python completion_payload = { "messages": [ - {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."}, - {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"}, - {"role": "assistant", "content": "Of course! Please provide your order ID so I can look it up."}, - {"role": "user", "content": "i think it is order_70705"}, + { + "role": "system", + "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.', + }, + { + "role": "user", + "content": "Hi, can you tell me the delivery date for my order?" + }, + { + "role": "assistant", + "content": "Of course! Please provide your order ID so I can look it up." + }, + { + "role": "user", + "content": "i think it is order_70705" + }, ] } diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 6ea733a70..3ba4aa327 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -179,7 +179,6 @@ void server::ProcessStreamRes(std::function cb, void server::ProcessNonStreamRes(std::function cb, SyncQueue& q) { auto [status, res] = q.wait_and_pop(); - function_calling_utils::PostProcessResponse(res); LOG_DEBUG << "response: " << res.toStyledString(); auto resp = cortex_utils::CreateCortexHttpJsonResponse(res); resp->setStatusCode( diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index b769c5e8c..c4d296427 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -544,6 +544,7 @@ void LocalEngine::LoadModel(std::shared_ptr json_body, params.push_back("--pooling"); params.push_back("mean"); + params.push_back("--jinja"); std::vector v; v.reserve(params.size() + 1); diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index 75d95f06d..e07ed71ba 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -13,8 +13,6 @@ cpp::result InferenceService::HandleChatCompletion( engine_type = (*(json_body)).get("engine", kLlamaRepo).asString(); } CTL_DBG("engine_type: " << engine_type); - function_calling_utils::PreprocessRequest(json_body); - CTL_DBG("engine_type: " << engine_type); auto tool_choice = json_body->get("tool_choice", Json::Value::null); auto model_id = json_body->get("model", "").asString(); if (saved_models_.find(model_id) != saved_models_.end()) { @@ -46,51 +44,6 @@ cpp::result InferenceService::HandleChatCompletion( return cpp::fail(std::make_pair(stt, res)); } - if (!model_id.empty()) { - if (auto model_service = model_service_.lock()) { - auto metadata_ptr = model_service->GetCachedModelMetadata(model_id); - if (metadata_ptr != nullptr && - !metadata_ptr->tokenizer->chat_template.empty()) { - auto tokenizer = metadata_ptr->tokenizer; - auto messages = (*json_body)["messages"]; - Json::Value messages_jsoncpp(Json::arrayValue); - for (auto message : messages) { - messages_jsoncpp.append(message); - } - - Json::Value tools(Json::arrayValue); - Json::Value template_data_json; - template_data_json["messages"] = messages_jsoncpp; - // template_data_json["tools"] = tools; - - auto prompt_result = jinja::RenderTemplate( - tokenizer->chat_template, template_data_json, tokenizer->bos_token, - tokenizer->eos_token, tokenizer->add_bos_token, - tokenizer->add_eos_token, tokenizer->add_generation_prompt); - if (prompt_result.has_value()) { - (*json_body)["prompt"] = prompt_result.value(); - if (json_body->isMember("stop")) { - bool need_append = true; - for (auto& s : (*json_body)["stop"]) { - if (s.asString() == tokenizer->eos_token) { - need_append = false; - } - } - if (need_append) { - (*json_body)["stop"].append(tokenizer->eos_token); - } - } else { - Json::Value stops(Json::arrayValue); - stops.append(tokenizer->eos_token); - (*json_body)["stop"] = stops; - } - } else { - CTL_ERR("Failed to render prompt: " + prompt_result.error()); - } - } - } - } - CTL_DBG("Json body inference: " + json_body->toStyledString()); auto cb = [q, tool_choice](Json::Value status, Json::Value res) { diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index d9359b698..68f0fe070 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -691,21 +691,7 @@ cpp::result ModelService::StartModel( auto status = std::get<0>(ir)["status_code"].asInt(); auto data = std::get<1>(ir); - if (status == drogon::k200OK) { - // start model successfully, in case not vision model, we store the metadata so we can use - // for each inference - if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) { - auto metadata_res = GetModelMetadata(model_handle); - if (metadata_res.has_value()) { - loaded_model_metadata_map_.emplace(model_handle, - std::move(metadata_res.value())); - CTL_INF("Successfully stored metadata for model " << model_handle); - } else { - CTL_WRN("Failed to get metadata for model " << model_handle << ": " - << metadata_res.error()); - } - } - + if (status == drogon::k200OK) { return StartModelResult{/* .success = */ true, /* .warning = */ may_fallback_res.value()}; } else if (status == drogon::k409Conflict) { @@ -760,8 +746,6 @@ cpp::result ModelService::StopModel( if (bypass_check) { bypass_stop_check_set_.erase(model_handle); } - loaded_model_metadata_map_.erase(model_handle); - CTL_INF("Removed metadata for model " << model_handle); return true; } else { CTL_ERR("Model failed to stop with status code: " << status); @@ -1090,14 +1074,6 @@ ModelService::GetModelMetadata(const std::string& model_id) const { return std::move(*model_metadata_res); } -std::shared_ptr ModelService::GetCachedModelMetadata( - const std::string& model_id) const { - if (loaded_model_metadata_map_.find(model_id) == - loaded_model_metadata_map_.end()) - return nullptr; - return loaded_model_metadata_map_.at(model_id); -} - std::string ModelService::GetEngineByModelId( const std::string& model_id) const { namespace fs = std::filesystem; diff --git a/engine/services/model_service.h b/engine/services/model_service.h index beba91f8c..fa247b954 100644 --- a/engine/services/model_service.h +++ b/engine/services/model_service.h @@ -83,9 +83,6 @@ class ModelService { cpp::result, std::string> GetModelMetadata( const std::string& model_id) const; - std::shared_ptr GetCachedModelMetadata( - const std::string& model_id) const; - std::string GetEngineByModelId(const std::string& model_id) const; private: @@ -104,12 +101,6 @@ class ModelService { std::unordered_set bypass_stop_check_set_; std::shared_ptr engine_svc_ = nullptr; - /** - * Store the chat template of loaded model. - */ - std::unordered_map> - loaded_model_metadata_map_; - std::mutex es_mtx_; std::unordered_map> es_; cortex::TaskQueue& task_queue_; diff --git a/engine/test/components/test_function_calling.cc b/engine/test/components/test_function_calling.cc deleted file mode 100644 index 7a4810b29..000000000 --- a/engine/test/components/test_function_calling.cc +++ /dev/null @@ -1,157 +0,0 @@ -#include -#include "gtest/gtest.h" -#include "json/json.h" -#include "utils/function_calling/common.h" - -class FunctionCallingUtilsTest : public ::testing::Test { - protected: - std::shared_ptr createTestRequest() { - auto request = std::make_shared(); - (*request)["tools"] = Json::Value(Json::arrayValue); - return request; - } -}; - -TEST_F(FunctionCallingUtilsTest, ReplaceCustomFunctions) { - std::string original = "Test placeholder"; - std::string replacement = "Custom function"; - std::string result = - function_calling_utils::ReplaceCustomFunctions(original, replacement); - EXPECT_EQ(result, "Test Custom function placeholder"); -} - -TEST_F(FunctionCallingUtilsTest, HasTools) { - auto request = createTestRequest(); - EXPECT_FALSE(function_calling_utils::HasTools(request)); - - (*request)["tools"].append(Json::Value()); - EXPECT_TRUE(function_calling_utils::HasTools(request)); - - (*request)["tools"] = "random"; - EXPECT_FALSE(function_calling_utils::HasTools(request)); - - (*request)["tools"] = Json::Value::null; - EXPECT_FALSE(function_calling_utils::HasTools(request)); -} - -TEST_F(FunctionCallingUtilsTest, ProcessTools) { - auto request = createTestRequest(); - Json::Value tool; - tool["type"] = "function"; - tool["function"]["name"] = "test_function"; - tool["function"]["description"] = "Test description"; - (*request)["tools"].append(tool); - - std::string result = function_calling_utils::ProcessTools(request); - EXPECT_TRUE( - result.find("Use the function 'test_function' to: Test description") != - std::string::npos); -} - -TEST_F(FunctionCallingUtilsTest, ParseMultipleFunctionStrings) { - std::string input = - "{\"arg\":\"value1\"}{\"arg\":\"value2\"}"; - Json::Value result = - function_calling_utils::ParseMultipleFunctionStrings(input); - - ASSERT_EQ(result.size(), 2); - EXPECT_EQ(result[0]["function"]["name"].asString(), "func1"); - EXPECT_EQ(result[0]["function"]["arguments"].asString(), - "{\"arg\":\"value1\"}"); - EXPECT_EQ(result[1]["function"]["name"].asString(), "func2"); - EXPECT_EQ(result[1]["function"]["arguments"].asString(), - "{\"arg\":\"value2\"}"); -} - -TEST_F(FunctionCallingUtilsTest, ConvertJsonToFunctionStrings) { - Json::Value jsonArray(Json::arrayValue); - Json::Value function1, function2; - function1["function"]["name"] = "func1"; - function1["function"]["arguments"] = "{\"arg\":\"value1\"}"; - function2["function"]["name"] = "func2"; - function2["function"]["arguments"] = "{\"arg\":\"value2\"}"; - jsonArray.append(function1); - jsonArray.append(function2); - - std::string result = - function_calling_utils::ConvertJsonToFunctionStrings(jsonArray); - EXPECT_EQ(result, - "{\"arg\":\"value1\"}{\"arg\":\"value2\"}"); -} - -TEST_F(FunctionCallingUtilsTest, CreateCustomFunctionsString) { - auto request = createTestRequest(); - Json::Value tool; - tool["type"] = "function"; - tool["function"]["name"] = "test_function"; - tool["function"]["description"] = "Test description"; - (*request)["tools"].append(tool); - - std::string result = - function_calling_utils::CreateCustomFunctionsString(request); - EXPECT_TRUE(result.find("```") != std::string::npos); - EXPECT_TRUE( - result.find("Use the function 'test_function' to: Test description") != - std::string::npos); -} - -TEST_F(FunctionCallingUtilsTest, IsValidToolChoiceFormat) { - Json::Value validTool; - validTool["type"] = "function"; - validTool["function"]["name"] = "test_function"; - EXPECT_TRUE(function_calling_utils::IsValidToolChoiceFormat(validTool)); - - Json::Value invalidTool; - EXPECT_FALSE(function_calling_utils::IsValidToolChoiceFormat(invalidTool)); -} - -TEST_F(FunctionCallingUtilsTest, UpdateMessages) { - auto request = createTestRequest(); - std::string system_prompt = "Original prompt"; - (*request)["messages"] = Json::Value(Json::arrayValue); - - function_calling_utils::UpdateMessages(system_prompt, request); - - ASSERT_TRUE((*request)["messages"].isArray()); - EXPECT_EQ((*request)["messages"][0]["role"].asString(), "system"); - EXPECT_EQ((*request)["messages"][0]["content"].asString(), system_prompt); -} - -TEST_F(FunctionCallingUtilsTest, PreprocessRequest) { - auto request = createTestRequest(); - Json::Value tool; - tool["type"] = "function"; - tool["function"]["name"] = "test_function"; - tool["function"]["description"] = "Test description"; - (*request)["tools"].append(tool); - - function_calling_utils::PreprocessRequest(request); - - ASSERT_TRUE((*request)["messages"].isArray()); - EXPECT_TRUE((*request)["messages"][0]["content"].asString().find( - "Test description") != std::string::npos); -} - -TEST_F(FunctionCallingUtilsTest, PostProcessResponse) { - Json::Value response; - response["choices"] = Json::Value(Json::arrayValue); - Json::Value choice; - choice["message"]["content"] = - "{\"arg\":\"value\"}"; - response["choices"].append(choice); - - function_calling_utils::PostProcessResponse(response); - - EXPECT_EQ(response["choices"][0]["message"]["content"].asString(), ""); - EXPECT_TRUE(response["choices"][0]["message"]["tool_calls"].isArray()); - EXPECT_EQ( - response["choices"][0]["message"]["tool_calls"][0]["function"]["name"] - .asString(), - "test_function"); - EXPECT_EQ(response["choices"][0]["message"]["tool_calls"][0]["function"] - ["arguments"] - .asString(), - "{\"arg\":\"value\"}"); -} \ No newline at end of file diff --git a/engine/utils/cli_selection_utils.h b/engine/utils/cli_selection_utils.h index dca6fe675..487c21e6b 100644 --- a/engine/utils/cli_selection_utils.h +++ b/engine/utils/cli_selection_utils.h @@ -27,13 +27,13 @@ inline void PrintMenu( inline std::optional GetNumericValue(const std::string& sval) { try { - return std::stoi(sval); + return std::stoi(sval); } catch (const std::invalid_argument&) { - // Not a valid number - return std::nullopt; + // Not a valid number + return std::nullopt; } catch (const std::out_of_range&) { - // Number out of range - return std::nullopt; + // Number out of range + return std::nullopt; } } @@ -73,14 +73,16 @@ inline std::optional PrintModelSelection( } // Validate if the selection consists solely of numeric characters - if(!std::all_of(selection.begin(), selection.end(), ::isdigit)){ + if (!std::all_of(selection.begin(), selection.end(), ::isdigit)) { return std::nullopt; } // deal with out of range numeric values std::optional numeric_value = GetNumericValue(selection); - - if (!numeric_value.has_value() || (unsigned) numeric_value.value() > availables.size() || numeric_value.value() < 1) { + + if (!numeric_value.has_value() || + (unsigned)numeric_value.value() > availables.size() || + numeric_value.value() < 1) { return std::nullopt; } @@ -101,13 +103,15 @@ inline std::optional PrintSelection( } // Validate if the selection consists solely of numeric characters - if(!std::all_of(selection.begin(), selection.end(), ::isdigit)){ + if (!std::all_of(selection.begin(), selection.end(), ::isdigit)) { return std::nullopt; } - + // deal with out of range numeric values std::optional numeric_value = GetNumericValue(selection); - if (!numeric_value.has_value() ||(unsigned) numeric_value.value() > options.size() || numeric_value.value() < 1) { + if (!numeric_value.has_value() || + (unsigned)numeric_value.value() > options.size() || + numeric_value.value() < 1) { return std::nullopt; } diff --git a/engine/utils/function_calling/common.h b/engine/utils/function_calling/common.h index 34a1c9862..953a9964c 100644 --- a/engine/utils/function_calling/common.h +++ b/engine/utils/function_calling/common.h @@ -129,157 +129,4 @@ inline Json::Value ParseJsonString(const std::string& jsonString) { return root; } -inline std::string CreateCustomFunctionsString( - std::shared_ptr request) { - std::string customFunctions = ProcessTools(request); - if (customFunctions.empty()) { - return ""; // No custom functions found - } - - return "```\n" + customFunctions + "```"; -} -inline bool IsValidToolChoiceFormat(const Json::Value& root) { - return root.isObject() && root.isMember("type") && root["type"].isString() && - root["type"].asString() == "function" && root.isMember("function") && - root["function"].isObject() && root["function"].isMember("name") && - root["function"]["name"].isString(); -} -inline void UpdateMessages(std::string& system_prompt, - std::shared_ptr request) { - Json::Value tool_choice = request->get("tool_choice", "auto"); - if (tool_choice.isString() && tool_choice.asString() == "required") { - system_prompt += - "\n\nYou must call a function to answer the user's question."; - } else if (!tool_choice.isString()) { - - system_prompt += - "\n\nNow this is your first priority: You must call the function '" + - tool_choice["function"]["name"].asString() + - "' to answer the user's question."; - } - bool parallel_tool_calls = request->get("parallel_tool_calls", true).asBool(); - if (!parallel_tool_calls) { - system_prompt += "\n\nNow this is your first priority: You must call the only one function at a time."; - } - - bool tools_call_in_user_message = - request->get("tools_call_in_user_message", false).asBool(); - - bool original_stream_config = (*request).get("stream", false).asBool(); - // (*request)["grammar"] = function_calling_utils::gamma_json; - (*request)["stream"] = - false; //when using function calling, disable stream automatically because we need to parse the response to get function name and params - - if (!request->isMember("messages") || !(*request)["messages"].isArray() || - (*request)["messages"].empty()) { - // If no messages, add the system prompt as the first message - Json::Value systemMessage; - systemMessage["role"] = "system"; - systemMessage["content"] = system_prompt; - (*request)["messages"].append(systemMessage); - } else { - - if (tools_call_in_user_message) { - for (Json::Value& message : (*request)["messages"]) { - if (message["role"] == "user" && message.isMember("tools") && - message["tools"].isArray() && message["tools"].size() > 0) { - message["content"] = system_prompt + "\n User question: " + - message["content"].asString(); - } - } - } else { - Json::Value& firstMessage = (*request)["messages"][0]; - if (firstMessage["role"] == "system") { - bool addCustomPrompt = - request->get("add_custom_system_prompt", true).asBool(); - if (addCustomPrompt) { - firstMessage["content"] = - system_prompt + "\n" + firstMessage["content"].asString(); - } - } else { - // If the first message is not a system message, prepend the system prompt - Json::Value systemMessage; - systemMessage["role"] = "system"; - systemMessage["content"] = system_prompt; - (*request)["messages"].insert(0, systemMessage); - } - } - - // transform last message role to tool if it is a function call - Json::Value& lastMessage = - (*request)["messages"][(*request)["messages"].size() - 1]; - if (lastMessage.get("role", "") == "tool") { - lastMessage["role"] = function_calling_llama3_1_utils::tool_role; - (*request)["stream"] = - original_stream_config; // if role is tool then should restore stream config to original value - } - } - for (Json::Value& message : (*request)["messages"]) { - if (message["role"] == "assistant" && message.isMember("tool_calls")) { - const Json::Value& tool_calls = message["tool_calls"]; - if (!tool_calls.isNull() && tool_calls.isArray() && - tool_calls.size() > 0) { - message["content"] = ConvertJsonToFunctionStrings(tool_calls); - message["tool_calls"] = {}; - } - } - } -} -inline void PreprocessRequest(std::shared_ptr request) { - if (!function_calling_utils::HasTools(request)) { - return; // Exit if no tools present - } - if (request->get("tool_choice", "auto").isString()) { - std::string tool_choice = request->get("tool_choice", "auto").asString(); - if (tool_choice == "none") { - return; // Exit if tool_choice is none - } - } - std::string customFunctionsString = - function_calling_utils::CreateCustomFunctionsString(request); - std::string new_system_prompt = - function_calling_utils::ReplaceCustomFunctions( - function_calling_llama3_1_utils::system_prompt, - customFunctionsString); - UpdateMessages(new_system_prompt, request); -} - -inline void PostProcessResponse(Json::Value& response) { - if (!response.isMember("choices") || !response["choices"].isArray() || - response["choices"].empty()) { - // If there are no choices or the structure is incorrect, do nothing - return; - } - - // Get a reference to the first choice - Json::Value& firstChoice = response["choices"][0]; - - // Check if the choice has a message with content - if (firstChoice.isMember("message") && - firstChoice["message"].isMember("content")) { - std::string content = firstChoice["message"]["content"].asString(); - - // Create a new structure for tool_calls - Json::Value toolCall = ParseMultipleFunctionStrings(content); - if (toolCall.size() > 0) { - // Add tool_calls to the message - if (response.get("tool_choice", "auto").isString()) { - std::string tool_choice = - response.get("tool_choice", "auto").asString(); - if (tool_choice == "auto") { - firstChoice["finish_reason"] = "tool_calls"; - } else { - firstChoice["finish_reason"] = "stop"; - } - } - - firstChoice["message"]["tool_calls"] = toolCall; - - // Clear the content as it's now represented in tool_calls - firstChoice["message"]["content"] = ""; - } - } - - // Add any additional post-processing logic here -} } // namespace function_calling_utils diff --git a/function-calling.py b/function-calling.py new file mode 100644 index 000000000..32ef31752 --- /dev/null +++ b/function-calling.py @@ -0,0 +1,173 @@ +from datetime import datetime +from openai import OpenAI +from pydantic import BaseModel +import json + +# MODEL = "deepseek-r1-distill-qwen-7b:7b" +MODEL = "llama3.1:8b-q8" + +client = OpenAI( + base_url="http://localhost:39281/v1", + api_key="not-needed", # Authentication is not required for local deployment +) + +tools = [ + { + "type": "function", + "function": { + "name": "puppeteer_navigate", + "description": "Navigate to a URL", + "parameters": { + "properties": {"url": {"type": "string"}}, + "required": ["url"], + "type": "object", + }, + "strict": False, + }, + }, + { + "type": "function", + "function": { + "name": "puppeteer_screenshot", + "description": "Take a screenshot of the current page or a specific element", + "parameters": { + "properties": { + "height": { + "description": "Height in pixels (default: 600)", + "type": "number", + }, + "name": { + "description": "Name for the screenshot", + "type": "string", + }, + "selector": { + "description": "CSS selector for element to screenshot", + "type": "string", + }, + "width": { + "description": "Width in pixels (default: 800)", + "type": "number", + }, + }, + "required": ["name"], + "type": "object", + }, + "strict": False, + }, + }, + { + "type": "function", + "function": { + "name": "puppeteer_click", + "description": "Click an element on the page", + "parameters": { + "properties": { + "selector": { + "description": "CSS selector for element to click", + "type": "string", + } + }, + "required": ["selector"], + "type": "object", + }, + "strict": False, + }, + }, + { + "type": "function", + "function": { + "name": "puppeteer_fill", + "description": "Fill out an input field", + "parameters": { + "properties": { + "selector": { + "description": "CSS selector for input field", + "type": "string", + }, + "value": {"description": "Value to fill", "type": "string"}, + }, + "required": ["selector", "value"], + "type": "object", + }, + "strict": False, + }, + }, + { + "type": "function", + "function": { + "name": "puppeteer_select", + "description": "Select an element on the page with Select tag", + "parameters": { + "properties": { + "selector": { + "description": "CSS selector for element to select", + "type": "string", + }, + "value": {"description": "Value to select", "type": "string"}, + }, + "required": ["selector", "value"], + "type": "object", + }, + "strict": False, + }, + }, + { + "type": "function", + "function": { + "name": "puppeteer_hover", + "description": "Hover an element on the page", + "parameters": { + "properties": { + "selector": { + "description": "CSS selector for element to hover", + "type": "string", + } + }, + "required": ["selector"], + "type": "object", + }, + "strict": False, + }, + }, + { + "type": "function", + "function": { + "name": "puppeteer_evaluate", + "description": "Execute JavaScript in the browser console", + "parameters": { + "properties": { + "script": { + "description": "JavaScript code to execute", + "type": "string", + } + }, + "required": ["script"], + "type": "object", + }, + "strict": False, + }, + }, +] + +completion_payload = { + "messages": [ + { + "role": "system", + "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.', + }, + { + "role": "user", + "content": "go to google search", + }, + ] +} + +response = client.chat.completions.create( + top_p=0.9, + temperature=0.6, + model=MODEL, + messages=completion_payload["messages"], + tools=tools, +) + +print(response) \ No newline at end of file From f19228771d493f8fb8caee50cc4bfe55238ac306 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 13 May 2025 13:51:56 +0700 Subject: [PATCH 04/12] fix: remove jinja parameter (#2205) * fix: remove jinja parameter * chore: disable linux arm CI --------- Co-authored-by: sangjanai --- .github/workflows/beta-build.yml | 30 +++++++++---------- .github/workflows/cortex-cpp-quality-gate.yml | 24 +++++++-------- .github/workflows/nightly-build.yml | 28 ++++++++--------- .github/workflows/stable-build.yml | 28 ++++++++--------- .../extensions/local-engine/local_engine.cc | 1 - 5 files changed, 55 insertions(+), 56 deletions(-) diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml index 1d5480312..64d4e28e7 100644 --- a/.github/workflows/beta-build.yml +++ b/.github/workflows/beta-build.yml @@ -81,20 +81,20 @@ jobs: llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} arch: amd64 - build-linux-arm64: - uses: ./.github/workflows/template-build-linux.yml - secrets: inherit - needs: [get-update-version, create-draft-release, get-llamacpp-latest-version] - with: - ref: ${{ github.ref }} - public_provider: github - new_version: ${{ needs.get-update-version.outputs.new_version }} - runs-on: ubuntu-2004-arm64 - cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" - channel: beta - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} - arch: arm64 + # build-linux-arm64: + # uses: ./.github/workflows/template-build-linux.yml + # secrets: inherit + # needs: [get-update-version, create-draft-release, get-llamacpp-latest-version] + # with: + # ref: ${{ github.ref }} + # public_provider: github + # new_version: ${{ needs.get-update-version.outputs.new_version }} + # runs-on: ubuntu-2004-arm64 + # cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" + # channel: beta + # upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} + # arch: arm64 build-docker-x64: uses: ./.github/workflows/template-build-docker-x64.yml @@ -127,7 +127,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} noti-discord: - needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, build-linux-arm64, update_release] + needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, update_release] runs-on: ubuntu-latest permissions: contents: write diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml index fc2d52b63..02774d159 100644 --- a/.github/workflows/cortex-cpp-quality-gate.yml +++ b/.github/workflows/cortex-cpp-quality-gate.yml @@ -21,12 +21,12 @@ jobs: fail-fast: false matrix: include: - - os: "linux" - name: "arm64" - runs-on: "ubuntu-2004-arm64" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake" - build-deps-cmake-flags: "" - ccache-dir: "" + # - os: "linux" + # name: "arm64" + # runs-on: "ubuntu-2004-arm64" + # cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake" + # build-deps-cmake-flags: "" + # ccache-dir: "" - os: "linux" name: "amd64" runs-on: "ubuntu-20-04-cuda-12-0" @@ -354,12 +354,12 @@ jobs: fail-fast: false matrix: include: - - os: "linux" - name: "arm64" - runs-on: "ubuntu-2004-arm64" - cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake" - build-deps-cmake-flags: "" - ccache-dir: "" + # - os: "linux" + # name: "arm64" + # runs-on: "ubuntu-2004-arm64" + # cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake" + # build-deps-cmake-flags: "" + # ccache-dir: "" - os: "linux" name: "amd64" runs-on: "ubuntu-20-04-cuda-12-0" diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index efdbfdf6f..f013a90e2 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -87,24 +87,24 @@ jobs: llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} arch: amd64 - build-linux-arm64: - uses: ./.github/workflows/template-build-linux.yml - secrets: inherit - needs: [get-update-version, set-public-provider, get-llamacpp-latest-version] - with: - ref: ${{ needs.set-public-provider.outputs.ref }} - public_provider: ${{ needs.set-public-provider.outputs.public_provider }} - new_version: ${{ needs.get-update-version.outputs.new_version }} - runs-on: ubuntu-2004-arm64 - cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" - channel: nightly - llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} - arch: arm64 + # build-linux-arm64: + # uses: ./.github/workflows/template-build-linux.yml + # secrets: inherit + # needs: [get-update-version, set-public-provider, get-llamacpp-latest-version] + # with: + # ref: ${{ needs.set-public-provider.outputs.ref }} + # public_provider: ${{ needs.set-public-provider.outputs.public_provider }} + # new_version: ${{ needs.get-update-version.outputs.new_version }} + # runs-on: ubuntu-2004-arm64 + # cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" + # channel: nightly + # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} + # arch: arm64 update-latest-version: runs-on: ubuntu-latest if: needs.set-public-provider.outputs.public_provider == 'aws-s3' - needs: [get-update-version, set-public-provider, build-linux-x64, build-linux-arm64, build-macos, build-windows-x64, get-llamacpp-latest-version] + needs: [get-update-version, set-public-provider, build-linux-x64, build-macos, build-windows-x64, get-llamacpp-latest-version] steps: - name: Update latest version id: update-latest-version diff --git a/.github/workflows/stable-build.yml b/.github/workflows/stable-build.yml index c4b5f53f3..27e05f9ce 100644 --- a/.github/workflows/stable-build.yml +++ b/.github/workflows/stable-build.yml @@ -81,20 +81,20 @@ jobs: llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} arch: amd64 - build-linux-arm64: - uses: ./.github/workflows/template-build-linux.yml - secrets: inherit - needs: [get-update-version, create-draft-release, get-llamacpp-latest-version] - with: - ref: ${{ github.ref }} - public_provider: github - new_version: ${{ needs.get-update-version.outputs.new_version }} - runs-on: ubuntu-2004-arm64 - cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" - channel: stable - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} - arch: arm64 + # build-linux-arm64: + # uses: ./.github/workflows/template-build-linux.yml + # secrets: inherit + # needs: [get-update-version, create-draft-release, get-llamacpp-latest-version] + # with: + # ref: ${{ github.ref }} + # public_provider: github + # new_version: ${{ needs.get-update-version.outputs.new_version }} + # runs-on: ubuntu-2004-arm64 + # cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake" + # channel: stable + # upload_url: ${{ needs.create-draft-release.outputs.upload_url }} + # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }} + # arch: arm64 build-docker-x64: uses: ./.github/workflows/template-build-docker-x64.yml diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index c4d296427..b769c5e8c 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -544,7 +544,6 @@ void LocalEngine::LoadModel(std::shared_ptr json_body, params.push_back("--pooling"); params.push_back("mean"); - params.push_back("--jinja"); std::vector v; v.reserve(params.size() + 1); From a90a5e8687801a7cdcd797eb6f9e8c140df91d76 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 16 May 2025 07:11:48 +0700 Subject: [PATCH 05/12] fix: check model is loaded before starting (#2206) * fix: check model is loaded before starting * chore: e2e test --------- Co-authored-by: sangjanai --- engine/config/yaml_config.cc | 2 +- .../cli/engines/test_cli_engine_uninstall.py | 5 +++- engine/e2e-test/cli/model/test_cli_model.py | 1 + .../extensions/local-engine/local_engine.cc | 29 ++++++++++++++++--- engine/services/model_source_service.cc | 3 +- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc index 9650ffdcc..38128e1c4 100644 --- a/engine/config/yaml_config.cc +++ b/engine/config/yaml_config.cc @@ -48,7 +48,7 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { if (!yaml_node_["mmproj"]) { auto s = nomalize_path(file_path); auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf"; - CTL_DBG("mmproj: " << abs_path); + CTL_TRC("mmproj: " << abs_path); auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path)); if (std::filesystem::exists(abs_path)) { yaml_node_["mmproj"] = rel_path.string(); diff --git a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py index 8672110e2..3198c81a5 100644 --- a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py +++ b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py @@ -24,7 +24,10 @@ def setup_and_teardown(self): @pytest.mark.asyncio async def test_engines_uninstall_llamacpp_should_be_successfully(self): - response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") + data = {"version": "b5371"} + response = requests.post( + "http://localhost:3928/v1/engines/llama-cpp/install", json=data + ) await wait_for_websocket_download_success_event(timeout=None) exit_code, output, error = run( "Uninstall engine", ["engines", "uninstall", "llama-cpp"] diff --git a/engine/e2e-test/cli/model/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py index aa6e99e4a..cd80a9e2b 100644 --- a/engine/e2e-test/cli/model/test_cli_model.py +++ b/engine/e2e-test/cli/model/test_cli_model.py @@ -36,6 +36,7 @@ def setup_and_teardown(self): run("Delete model", ["models", "delete", "tinyllama:1b"]) stop_server() + @pytest.mark.skipif(platform.system() == "Windows", reason="Skip test for Windows") def test_model_pull_with_direct_url_should_be_success(self): exit_code, output, error = run( "Pull model", diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index b769c5e8c..2bba11a7b 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -80,6 +80,11 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) { res.push_back("--no-mmap"); } continue; + } else if (member == "ignore_eos") { + if (root[member].asBool()) { + res.push_back("--ignore_eos"); + } + continue; } res.push_back("--" + member); @@ -502,6 +507,23 @@ void LocalEngine::HandleEmbedding(std::shared_ptr json_body, void LocalEngine::LoadModel(std::shared_ptr json_body, http_callback&& callback) { + auto model_id = json_body->get("model", "").asString(); + if (model_id.empty()) { + CTL_WRN("Model is empty"); + } + if (server_map_.find(model_id) != server_map_.end()) { + CTL_INF("Model " << model_id << " is already loaded"); + Json::Value error; + error["error"] = "Model " + model_id + " is already loaded"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = 409; + callback(std::move(status), std::move(error)); + return; + } + CTL_INF("Start loading model"); auto wait_for_server_up = [this](const std::string& model, const std::string& host, int port) { @@ -524,10 +546,7 @@ void LocalEngine::LoadModel(std::shared_ptr json_body, }; LOG_DEBUG << "Start to spawn llama-server"; - auto model_id = json_body->get("model", "").asString(); - if (model_id.empty()) { - CTL_WRN("Model is empty"); - } + server_map_[model_id].host = "127.0.0.1"; server_map_[model_id].port = GenerateRandomInteger(39400, 39999); auto& s = server_map_[model_id]; @@ -545,6 +564,8 @@ void LocalEngine::LoadModel(std::shared_ptr json_body, params.push_back("--pooling"); params.push_back("mean"); + params.push_back("--jinja"); + std::vector v; v.reserve(params.size() + 1); auto engine_dir = engine_service_.GetEngineDirPath(kLlamaRepo); diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc index b5979667c..661b9b580 100644 --- a/engine/services/model_source_service.cc +++ b/engine/services/model_source_service.cc @@ -433,8 +433,7 @@ cpp::result ModelSourceService::AddCortexsoRepo( auto author = hub_author; auto model_author = hu::GetModelAuthorCortexsoHub(model_name); - if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name); - model_author.has_value() && !model_author.value().empty()) { + if (model_author.has_value() && !model_author.value().empty()) { author = model_author.value(); } From 3a638267c26016d25b53ef936325b0b765487f1f Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Thu, 12 Jun 2025 13:47:26 +0530 Subject: [PATCH 06/12] Hostfix: remove not needed params from load_model (#2209) * refactor: remove --pooling flag from model loading The --pooling flag was removed as the mean pooling functionality not needed in chat models. This fixes the regression * feat(local-engine): add ctx_len parameter support Adds support for the ctx_len parameter by appending --ctx-size with its value. Removed outdated parameter mappings from the kParamsMap to reflect current implementation details and ensure consistency. * feat: add conditional model parameters based on path When the model path contains both "jan" and "nano" (case-insensitive), automatically add speculative decoding parameters to adjust generation behavior. This improves flexibility by enabling environment-specific configurations without manual parameter tuning. Also includes necessary headers for string manipulation and fixes whitespace in ctx_len handling. * chore: remove redundant comment The comment was redundant as the code's purpose is clear without it, improving readability. --- .../extensions/local-engine/local_engine.cc | 28 ++++++++++++++---- engine/services/model_service.cc | 29 ++++++++++++------- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index 2bba11a7b..beda1f44b 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -1,6 +1,9 @@ #include "local_engine.h" +#include #include +#include #include +#include #include #include "utils/curl_utils.h" #include "utils/json_helper.h" @@ -20,6 +23,7 @@ const std::unordered_set kIgnoredParams = { "user_prompt", "min_keep", "mirostat", "mirostat_eta", "mirostat_tau", "text_model", "version", "n_probs", "object", "penalize_nl", "precision", "size", + "flash_attn", "stop", "tfs_z", "typ_p", "caching_enabled"}; const std::unordered_map kParamsMap = { @@ -42,18 +46,24 @@ int GenerateRandomInteger(int min, int max) { std::uniform_int_distribution<> dis( min, max); // Distribution for the desired range - return dis(gen); // Generate and return a random integer within the range + return dis(gen); } std::vector ConvertJsonToParamsVector(const Json::Value& root) { std::vector res; - std::string errors; for (const auto& member : root.getMemberNames()) { if (member == "model_path" || member == "llama_model_path") { if (!root[member].isNull()) { + const std::string path = root[member].asString(); res.push_back("--model"); - res.push_back(root[member].asString()); + res.push_back(path); + + // If path contains both "Jan" and "nano", case-insensitive, add special params + std::string lowered = path; + std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) { + return std::tolower(c); + }); } continue; } else if (kIgnoredParams.find(member) != kIgnoredParams.end()) { @@ -85,8 +95,15 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) { res.push_back("--ignore_eos"); } continue; + } else if (member == "ctx_len") { + if (!root[member].isNull()) { + res.push_back("--ctx-size"); + res.push_back(root[member].asString()); + } + continue; } + // Generic handling for other members res.push_back("--" + member); if (root[member].isString()) { res.push_back(root[member].asString()); @@ -105,7 +122,7 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) { ss << "\"" << value.asString() << "\""; first = false; } - ss << "] "; + ss << "]"; res.push_back(ss.str()); } } @@ -113,6 +130,7 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) { return res; } + constexpr const auto kMinDataChunkSize = 6u; struct OaiInfo { @@ -561,8 +579,6 @@ void LocalEngine::LoadModel(std::shared_ptr json_body, params.push_back("--port"); params.push_back(std::to_string(s.port)); - params.push_back("--pooling"); - params.push_back("mean"); params.push_back("--jinja"); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 68f0fe070..2da6c749e 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -165,8 +165,8 @@ ModelService::ModelService(std::shared_ptr db_service, download_service_{download_service}, inference_svc_(inference_service), engine_svc_(engine_svc), - task_queue_(task_queue) { - // ProcessBgrTasks(); + task_queue_(task_queue){ + // ProcessBgrTasks(); }; void ModelService::ForceIndexingModelList() { @@ -557,6 +557,8 @@ cpp::result ModelService::StartModel( if (auto& o = params_override["ctx_len"]; !o.isNull()) { ctx_len = o.asInt(); } + Json::Value model_load_params; + json_helper::MergeJson(model_load_params, params_override); try { constexpr const int kDefautlContextLength = 8192; @@ -630,6 +632,8 @@ cpp::result ModelService::StartModel( #else json_data["model_path"] = fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string(); + model_load_params["model_path"] = + fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string(); #endif } else { LOG_WARN << "model_path is empty"; @@ -642,6 +646,8 @@ cpp::result ModelService::StartModel( #else json_data["mmproj"] = fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string(); + model_load_params["model_path"] = + fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string(); #endif } json_data["system_prompt"] = mc.system_template; @@ -655,6 +661,7 @@ cpp::result ModelService::StartModel( } json_data["model"] = model_handle; + model_load_params["model"] = model_handle; if (auto& cpt = custom_prompt_template; !cpt.value_or("").empty()) { auto parse_prompt_result = string_utils::ParsePrompt(cpt.value()); json_data["system_prompt"] = parse_prompt_result.system_prompt; @@ -662,8 +669,6 @@ cpp::result ModelService::StartModel( json_data["ai_prompt"] = parse_prompt_result.ai_prompt; } - json_helper::MergeJson(json_data, params_override); - // Set default cpu_threads if it is not configured if (!json_data.isMember("cpu_threads")) { json_data["cpu_threads"] = GetCpuThreads(); @@ -686,12 +691,12 @@ cpp::result ModelService::StartModel( assert(!!inference_svc_); - auto ir = - inference_svc_->LoadModel(std::make_shared(json_data)); + auto ir = inference_svc_->LoadModel( + std::make_shared(model_load_params)); auto status = std::get<0>(ir)["status_code"].asInt(); auto data = std::get<1>(ir); - if (status == drogon::k200OK) { + if (status == drogon::k200OK) { return StartModelResult{/* .success = */ true, /* .warning = */ may_fallback_res.value()}; } else if (status == drogon::k409Conflict) { @@ -1031,13 +1036,15 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl, auto es = hardware::EstimateLLaMACppRun(model_path, rc); if (!!es && (*es).gpu_mode.vram_MiB > free_vram_MiB && is_cuda) { - CTL_WRN("Not enough VRAM - " << "required: " << (*es).gpu_mode.vram_MiB - << ", available: " << free_vram_MiB); + CTL_WRN("Not enough VRAM - " + << "required: " << (*es).gpu_mode.vram_MiB + << ", available: " << free_vram_MiB); } if (!!es && (*es).cpu_mode.ram_MiB > free_ram_MiB) { - CTL_WRN("Not enough RAM - " << "required: " << (*es).cpu_mode.ram_MiB - << ", available: " << free_ram_MiB); + CTL_WRN("Not enough RAM - " + << "required: " << (*es).cpu_mode.ram_MiB + << ", available: " << free_ram_MiB); } return warning; From 9e87efca30e017c052c74e574bb998c9b0692a7a Mon Sep 17 00:00:00 2001 From: Louis Date: Thu, 12 Jun 2025 19:51:52 +0700 Subject: [PATCH 07/12] fix: do not ignore client request param (#2210) --- engine/extensions/local-engine/local_engine.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index beda1f44b..adc8649f6 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -23,7 +23,6 @@ const std::unordered_set kIgnoredParams = { "user_prompt", "min_keep", "mirostat", "mirostat_eta", "mirostat_tau", "text_model", "version", "n_probs", "object", "penalize_nl", "precision", "size", - "flash_attn", "stop", "tfs_z", "typ_p", "caching_enabled"}; const std::unordered_map kParamsMap = { From aab35862051ec45239e69060e4c150374a7d2bf9 Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Sat, 14 Jun 2025 11:03:14 +0530 Subject: [PATCH 08/12] feat: add reasoning_budget parameter to params map (#2211) --- engine/extensions/local-engine/local_engine.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index adc8649f6..74bf0d1b8 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -37,6 +37,7 @@ const std::unordered_map kParamsMap = { {"dynatemp_exponent", "--dynatemp-exp"}, {"ctx_len", "--ctx-size"}, {"ngl", "-ngl"}, + {"reasoning_budget", "--reasoning-budget"}, }; int GenerateRandomInteger(int min, int max) { @@ -50,6 +51,8 @@ int GenerateRandomInteger(int min, int max) { std::vector ConvertJsonToParamsVector(const Json::Value& root) { std::vector res; + std::string errors; + res.push_back("--no-webui"); for (const auto& member : root.getMemberNames()) { if (member == "model_path" || member == "llama_model_path") { From 62b74b7af9f94e23d4ffd10d8a185d0efb8abb12 Mon Sep 17 00:00:00 2001 From: ethanova Date: Sat, 14 Jun 2025 02:29:32 -0400 Subject: [PATCH 09/12] fix bug where for local models, delete only the model passed in, not all local models (#2207) Co-authored-by: Ethan Garber Co-authored-by: Akarshan Biswas --- engine/services/model_service.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 2da6c749e..51e42ff81 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -500,13 +500,10 @@ cpp::result ModelService::DeleteModel( std::filesystem::remove(yaml_fp); CTL_INF("Removed: " << yaml_fp.string()); } else { - // Remove yaml files - for (const auto& entry : - std::filesystem::directory_iterator(yaml_fp.parent_path())) { - if (entry.is_regular_file() && (entry.path().extension() == ".yml")) { - std::filesystem::remove(entry); - CTL_INF("Removed: " << entry.path().string()); - } + // Is a local model - Remove only this model's yaml file + if (std::filesystem::exists(yaml_fp)) { + std::filesystem::remove(yaml_fp); + CTL_INF("Removed: " << yaml_fp.string()); } } From 4cc2166204ef2c3bddb187c9ea3055caca615c9e Mon Sep 17 00:00:00 2001 From: Louis Date: Sun, 15 Jun 2025 16:44:09 +0700 Subject: [PATCH 10/12] fix: model lookup issue on Windows (#2213) --- engine/services/model_service.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 51e42ff81..a3771e0a1 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -626,6 +626,9 @@ cpp::result ModelService::StartModel( #if defined(_WIN32) json_data["model_path"] = cortex::wc::WstringToUtf8( fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring()); + model_load_params["model_path"] = + cortex::wc::WstringToUtf8( + fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring()); #else json_data["model_path"] = fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string(); From cc390f26400ce80732fa81fc6f20ae23a1f88795 Mon Sep 17 00:00:00 2001 From: Service Account Date: Fri, 4 Jul 2025 14:33:36 +0700 Subject: [PATCH 11/12] chore: update readme --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 5cd51ece1..ad4a379de 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ +> ⚠️ **Archived Notice** +> This repository is no longer actively maintained. +> We have migrated development to [menloresearch/llama.cpp](https://github.com/menloresearch/llama.cpp). +> Please contribute directly to `llama.cpp` moving forward. + # Cortex

From ee9e0ad5b55142959e6fd78ccbb273b993a2ade0 Mon Sep 17 00:00:00 2001 From: Service Account Date: Fri, 4 Jul 2025 14:37:57 +0700 Subject: [PATCH 12/12] chore: update readme --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ad4a379de..f56842d29 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ -> ⚠️ **Archived Notice** -> This repository is no longer actively maintained. -> We have migrated development to [menloresearch/llama.cpp](https://github.com/menloresearch/llama.cpp). -> Please contribute directly to `llama.cpp` moving forward. +

+

🚨 Archived Repository Notice

+

This repository is no longer actively maintained.

+

Development has moved to menloresearch/llama.cpp.

+

Please contribute directly to llama.cpp moving forward.

+
# Cortex