diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml
index 1d5480312..64d4e28e7 100644
--- a/.github/workflows/beta-build.yml
+++ b/.github/workflows/beta-build.yml
@@ -81,20 +81,20 @@ jobs:
llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
arch: amd64
- build-linux-arm64:
- uses: ./.github/workflows/template-build-linux.yml
- secrets: inherit
- needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
- with:
- ref: ${{ github.ref }}
- public_provider: github
- new_version: ${{ needs.get-update-version.outputs.new_version }}
- runs-on: ubuntu-2004-arm64
- cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
- channel: beta
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
- arch: arm64
+ # build-linux-arm64:
+ # uses: ./.github/workflows/template-build-linux.yml
+ # secrets: inherit
+ # needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
+ # with:
+ # ref: ${{ github.ref }}
+ # public_provider: github
+ # new_version: ${{ needs.get-update-version.outputs.new_version }}
+ # runs-on: ubuntu-2004-arm64
+ # cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # channel: beta
+ # upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
+ # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
+ # arch: arm64
build-docker-x64:
uses: ./.github/workflows/template-build-docker-x64.yml
@@ -127,7 +127,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
noti-discord:
- needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, build-linux-arm64, update_release]
+ needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, update_release]
runs-on: ubuntu-latest
permissions:
contents: write
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index fc2d52b63..02774d159 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -21,12 +21,12 @@ jobs:
fail-fast: false
matrix:
include:
- - os: "linux"
- name: "arm64"
- runs-on: "ubuntu-2004-arm64"
- cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
- build-deps-cmake-flags: ""
- ccache-dir: ""
+ # - os: "linux"
+ # name: "arm64"
+ # runs-on: "ubuntu-2004-arm64"
+ # cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # build-deps-cmake-flags: ""
+ # ccache-dir: ""
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-cuda-12-0"
@@ -354,12 +354,12 @@ jobs:
fail-fast: false
matrix:
include:
- - os: "linux"
- name: "arm64"
- runs-on: "ubuntu-2004-arm64"
- cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
- build-deps-cmake-flags: ""
- ccache-dir: ""
+ # - os: "linux"
+ # name: "arm64"
+ # runs-on: "ubuntu-2004-arm64"
+ # cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # build-deps-cmake-flags: ""
+ # ccache-dir: ""
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-cuda-12-0"
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
index efdbfdf6f..f013a90e2 100644
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -87,24 +87,24 @@ jobs:
llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
arch: amd64
- build-linux-arm64:
- uses: ./.github/workflows/template-build-linux.yml
- secrets: inherit
- needs: [get-update-version, set-public-provider, get-llamacpp-latest-version]
- with:
- ref: ${{ needs.set-public-provider.outputs.ref }}
- public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
- new_version: ${{ needs.get-update-version.outputs.new_version }}
- runs-on: ubuntu-2004-arm64
- cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
- channel: nightly
- llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
- arch: arm64
+ # build-linux-arm64:
+ # uses: ./.github/workflows/template-build-linux.yml
+ # secrets: inherit
+ # needs: [get-update-version, set-public-provider, get-llamacpp-latest-version]
+ # with:
+ # ref: ${{ needs.set-public-provider.outputs.ref }}
+ # public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
+ # new_version: ${{ needs.get-update-version.outputs.new_version }}
+ # runs-on: ubuntu-2004-arm64
+ # cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # channel: nightly
+ # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
+ # arch: arm64
update-latest-version:
runs-on: ubuntu-latest
if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
- needs: [get-update-version, set-public-provider, build-linux-x64, build-linux-arm64, build-macos, build-windows-x64, get-llamacpp-latest-version]
+ needs: [get-update-version, set-public-provider, build-linux-x64, build-macos, build-windows-x64, get-llamacpp-latest-version]
steps:
- name: Update latest version
id: update-latest-version
diff --git a/.github/workflows/stable-build.yml b/.github/workflows/stable-build.yml
index c4b5f53f3..27e05f9ce 100644
--- a/.github/workflows/stable-build.yml
+++ b/.github/workflows/stable-build.yml
@@ -81,20 +81,20 @@ jobs:
llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
arch: amd64
- build-linux-arm64:
- uses: ./.github/workflows/template-build-linux.yml
- secrets: inherit
- needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
- with:
- ref: ${{ github.ref }}
- public_provider: github
- new_version: ${{ needs.get-update-version.outputs.new_version }}
- runs-on: ubuntu-2004-arm64
- cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
- channel: stable
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
- arch: arm64
+ # build-linux-arm64:
+ # uses: ./.github/workflows/template-build-linux.yml
+ # secrets: inherit
+ # needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
+ # with:
+ # ref: ${{ github.ref }}
+ # public_provider: github
+ # new_version: ${{ needs.get-update-version.outputs.new_version }}
+ # runs-on: ubuntu-2004-arm64
+ # cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # channel: stable
+ # upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
+ # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
+ # arch: arm64
build-docker-x64:
uses: ./.github/workflows/template-build-docker-x64.yml
diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml
index 038546097..ea96d2df6 100644
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@@ -253,6 +253,14 @@ jobs:
cd engine
make codesign-binary CODE_SIGN=true DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" DESTINATION_BINARY_NAME="${{ steps.set-output-params.outputs.destination_binary_name }}" DESTINATION_BINARY_SERVER_NAME="${{ steps.set-output-params.outputs.destination_binary_server_name }}"
+ - name: Code Signing binaries for separate binary
+ run: |
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+
- name: Notary macOS Binary
run: |
curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sh -s -- -b /usr/local/bin
@@ -265,6 +273,18 @@ jobs:
QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }}
QUILL_NOTARY_KEY: "/tmp/notary-key.p8"
+ - name: Notary macOS Binary for separate binary
+ run: |
+ # Notarize the binary
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+ env:
+ QUILL_NOTARY_KEY_ID: ${{ secrets.NOTARY_KEY_ID }}
+ QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }}
+ QUILL_NOTARY_KEY: "/tmp/notary-key.p8"
+
- name: Build network Installers
shell: bash
run: |
@@ -310,6 +330,24 @@ jobs:
xcrun notarytool submit ${{ steps.set-output-params.outputs.package_name }}-local.pkg --apple-id ${{ secrets.APPLE_ID }} --password ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} --team-id ${{ secrets.APPLE_TEAM_ID }} --wait
- name: Package
+ run: |
+ mkdir temp
+ # Mac arm64
+ mv cortex-${{ inputs.new_version }}-mac-arm64 temp/cortex
+ cd temp
+ tar -czvf cortex-arm64.tar.gz cortex
+ mv cortex-arm64.tar.gz ../cortex-arm64.tar.gz
+ cd ..
+ rm -rf temp/cortex
+
+ # Mac amd64
+ mv cortex-${{ inputs.new_version }}-mac-amd64 temp/cortex
+ cd temp
+ tar -czvf cortex-amd64.tar.gz cortex
+ mv cortex-amd64.tar.gz ../cortex-amd64.tar.gz
+ cd ..
+
+ - name: Package for separate binary
run: |
cd engine
make package
@@ -320,6 +358,18 @@ jobs:
name: cortex-${{ inputs.new_version }}-mac-universal
path: ./engine/cortex
+ - name: Upload Artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: cortex-${{ inputs.new_version }}-mac-arm64-signed
+ path: ./cortex-${{ inputs.new_version }}-mac-arm64
+
+ - name: Upload Artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: cortex-${{ inputs.new_version }}-mac-amd64-signed
+ path: ./cortex-${{ inputs.new_version }}-mac-amd64
+
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
@@ -358,6 +408,28 @@ jobs:
asset_name: cortex-${{ inputs.new_version }}-mac-universal.tar.gz
asset_content_type: application/zip
+ - name: Upload release assert if public provider is github
+ if: inputs.public_provider == 'github'
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ uses: actions/upload-release-asset@v1.0.1
+ with:
+ upload_url: ${{ inputs.upload_url }}
+ asset_path: ./cortex-arm64.tar.gz
+ asset_name: cortex-${{ inputs.new_version }}-mac-arm64.tar.gz
+ asset_content_type: application/zip
+
+ - name: Upload release assert if public provider is github
+ if: inputs.public_provider == 'github'
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ uses: actions/upload-release-asset@v1.0.1
+ with:
+ upload_url: ${{ inputs.upload_url }}
+ asset_path: ./cortex-amd64.tar.gz
+ asset_name: cortex-${{ inputs.new_version }}-mac-amd64.tar.gz
+ asset_content_type: application/zip
+
- name: Upload release assert if public provider is github
if: inputs.public_provider == 'github'
env:
diff --git a/README.md b/README.md
index 5cd51ece1..f56842d29 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,10 @@
+
+
🚨 Archived Repository Notice
+
This repository is no longer actively maintained.
+
Development has moved to menloresearch/llama.cpp.
+
Please contribute directly to llama.cpp
moving forward.
+
+
# Cortex
diff --git a/docs/docs/guides/function-calling.md b/docs/docs/guides/function-calling.md
index 6b9157f18..7725f225d 100644
--- a/docs/docs/guides/function-calling.md
+++ b/docs/docs/guides/function-calling.md
@@ -63,8 +63,14 @@ tools = [
completion_payload = {
"messages": [
- {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."},
- {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"},
+ {
+ "role": "system",
+ "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
+ },
+ {
+ "role": "user",
+ "content": "Hi, can you tell me the delivery date for my order?"
+ },
]
}
@@ -126,10 +132,22 @@ Once the user provides their order ID:
```python
completion_payload = {
"messages": [
- {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."},
- {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"},
- {"role": "assistant", "content": "Of course! Please provide your order ID so I can look it up."},
- {"role": "user", "content": "i think it is order_70705"},
+ {
+ "role": "system",
+ "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
+ },
+ {
+ "role": "user",
+ "content": "Hi, can you tell me the delivery date for my order?"
+ },
+ {
+ "role": "assistant",
+ "content": "Of course! Please provide your order ID so I can look it up."
+ },
+ {
+ "role": "user",
+ "content": "i think it is order_70705"
+ },
]
}
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
index 9650ffdcc..38128e1c4 100644
--- a/engine/config/yaml_config.cc
+++ b/engine/config/yaml_config.cc
@@ -48,7 +48,7 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
if (!yaml_node_["mmproj"]) {
auto s = nomalize_path(file_path);
auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf";
- CTL_DBG("mmproj: " << abs_path);
+ CTL_TRC("mmproj: " << abs_path);
auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path));
if (std::filesystem::exists(abs_path)) {
yaml_node_["mmproj"] = rel_path.string();
diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index 6ea733a70..3ba4aa327 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -179,7 +179,6 @@ void server::ProcessStreamRes(std::function cb,
void server::ProcessNonStreamRes(std::function cb,
SyncQueue& q) {
auto [status, res] = q.wait_and_pop();
- function_calling_utils::PostProcessResponse(res);
LOG_DEBUG << "response: " << res.toStyledString();
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
resp->setStatusCode(
diff --git a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
index 8672110e2..3198c81a5 100644
--- a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
@@ -24,7 +24,10 @@ def setup_and_teardown(self):
@pytest.mark.asyncio
async def test_engines_uninstall_llamacpp_should_be_successfully(self):
- response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+ data = {"version": "b5371"}
+ response = requests.post(
+ "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+ )
await wait_for_websocket_download_success_event(timeout=None)
exit_code, output, error = run(
"Uninstall engine", ["engines", "uninstall", "llama-cpp"]
diff --git a/engine/e2e-test/cli/model/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py
index aa6e99e4a..cd80a9e2b 100644
--- a/engine/e2e-test/cli/model/test_cli_model.py
+++ b/engine/e2e-test/cli/model/test_cli_model.py
@@ -36,6 +36,7 @@ def setup_and_teardown(self):
run("Delete model", ["models", "delete", "tinyllama:1b"])
stop_server()
+ @pytest.mark.skipif(platform.system() == "Windows", reason="Skip test for Windows")
def test_model_pull_with_direct_url_should_be_success(self):
exit_code, output, error = run(
"Pull model",
diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc
index 885c14d77..74bf0d1b8 100644
--- a/engine/extensions/local-engine/local_engine.cc
+++ b/engine/extensions/local-engine/local_engine.cc
@@ -1,6 +1,9 @@
#include "local_engine.h"
+#include
#include
+#include
#include
+#include
#include
#include "utils/curl_utils.h"
#include "utils/json_helper.h"
@@ -20,7 +23,7 @@ const std::unordered_set kIgnoredParams = {
"user_prompt", "min_keep", "mirostat", "mirostat_eta",
"mirostat_tau", "text_model", "version", "n_probs",
"object", "penalize_nl", "precision", "size",
- "stop", "tfs_z", "typ_p"};
+ "stop", "tfs_z", "typ_p", "caching_enabled"};
const std::unordered_map kParamsMap = {
{"cpu_threads", "--threads"},
@@ -34,6 +37,7 @@ const std::unordered_map kParamsMap = {
{"dynatemp_exponent", "--dynatemp-exp"},
{"ctx_len", "--ctx-size"},
{"ngl", "-ngl"},
+ {"reasoning_budget", "--reasoning-budget"},
};
int GenerateRandomInteger(int min, int max) {
@@ -42,18 +46,26 @@ int GenerateRandomInteger(int min, int max) {
std::uniform_int_distribution<> dis(
min, max); // Distribution for the desired range
- return dis(gen); // Generate and return a random integer within the range
+ return dis(gen);
}
std::vector ConvertJsonToParamsVector(const Json::Value& root) {
std::vector res;
std::string errors;
+ res.push_back("--no-webui");
for (const auto& member : root.getMemberNames()) {
if (member == "model_path" || member == "llama_model_path") {
if (!root[member].isNull()) {
+ const std::string path = root[member].asString();
res.push_back("--model");
- res.push_back(root[member].asString());
+ res.push_back(path);
+
+ // If path contains both "Jan" and "nano", case-insensitive, add special params
+ std::string lowered = path;
+ std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) {
+ return std::tolower(c);
+ });
}
continue;
} else if (kIgnoredParams.find(member) != kIgnoredParams.end()) {
@@ -67,8 +79,33 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) {
res.push_back("--embedding");
}
continue;
+ } else if (member == "cache_type") {
+ if (!root[member].isNull()) {
+ res.push_back("-ctk");
+ res.push_back(root[member].asString());
+ res.push_back("-ctv");
+ res.push_back(root[member].asString());
+ }
+ continue;
+ } else if (member == "use_mmap") {
+ if (!root[member].asBool()) {
+ res.push_back("--no-mmap");
+ }
+ continue;
+ } else if (member == "ignore_eos") {
+ if (root[member].asBool()) {
+ res.push_back("--ignore_eos");
+ }
+ continue;
+ } else if (member == "ctx_len") {
+ if (!root[member].isNull()) {
+ res.push_back("--ctx-size");
+ res.push_back(root[member].asString());
+ }
+ continue;
}
+ // Generic handling for other members
res.push_back("--" + member);
if (root[member].isString()) {
res.push_back(root[member].asString());
@@ -87,7 +124,7 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) {
ss << "\"" << value.asString() << "\"";
first = false;
}
- ss << "] ";
+ ss << "]";
res.push_back(ss.str());
}
}
@@ -95,6 +132,7 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) {
return res;
}
+
constexpr const auto kMinDataChunkSize = 6u;
struct OaiInfo {
@@ -489,6 +527,23 @@ void LocalEngine::HandleEmbedding(std::shared_ptr json_body,
void LocalEngine::LoadModel(std::shared_ptr json_body,
http_callback&& callback) {
+ auto model_id = json_body->get("model", "").asString();
+ if (model_id.empty()) {
+ CTL_WRN("Model is empty");
+ }
+ if (server_map_.find(model_id) != server_map_.end()) {
+ CTL_INF("Model " << model_id << " is already loaded");
+ Json::Value error;
+ error["error"] = "Model " + model_id + " is already loaded";
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 409;
+ callback(std::move(status), std::move(error));
+ return;
+ }
+
CTL_INF("Start loading model");
auto wait_for_server_up = [this](const std::string& model,
const std::string& host, int port) {
@@ -511,10 +566,7 @@ void LocalEngine::LoadModel(std::shared_ptr json_body,
};
LOG_DEBUG << "Start to spawn llama-server";
- auto model_id = json_body->get("model", "").asString();
- if (model_id.empty()) {
- CTL_WRN("Model is empty");
- }
+
server_map_[model_id].host = "127.0.0.1";
server_map_[model_id].port = GenerateRandomInteger(39400, 39999);
auto& s = server_map_[model_id];
@@ -529,8 +581,8 @@ void LocalEngine::LoadModel(std::shared_ptr json_body,
params.push_back("--port");
params.push_back(std::to_string(s.port));
- params.push_back("--pooling");
- params.push_back("mean");
+
+ params.push_back("--jinja");
std::vector v;
v.reserve(params.size() + 1);
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index 89cd00058..15c7148c7 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -772,7 +772,13 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const {
// try to find version.txt
auto version_txt_path = version_entry.path() / "version.txt";
if (!std::filesystem::exists(version_txt_path)) {
- continue;
+ // create new one
+ std::ofstream meta(version_txt_path, std::ios::out);
+ meta << "name: " << entry.path().filename() << std::endl;
+ meta << "version: " << version_entry.path().filename() << std::endl;
+ meta.close();
+ CTL_INF("name: " << entry.path().filename().string() << ", version: "
+ << version_entry.path().filename().string());
}
try {
@@ -865,7 +871,9 @@ void EngineService::RegisterEngineLibPath() {
// register deps
std::vector paths{};
- paths.push_back(cuda_path);
+ if (std::filesystem::exists(cuda_path)) {
+ paths.push_back(cuda_path);
+ }
paths.push_back(engine_dir_path);
CTL_DBG("Registering dylib for "
diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc
index 75d95f06d..e07ed71ba 100644
--- a/engine/services/inference_service.cc
+++ b/engine/services/inference_service.cc
@@ -13,8 +13,6 @@ cpp::result InferenceService::HandleChatCompletion(
engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
}
CTL_DBG("engine_type: " << engine_type);
- function_calling_utils::PreprocessRequest(json_body);
- CTL_DBG("engine_type: " << engine_type);
auto tool_choice = json_body->get("tool_choice", Json::Value::null);
auto model_id = json_body->get("model", "").asString();
if (saved_models_.find(model_id) != saved_models_.end()) {
@@ -46,51 +44,6 @@ cpp::result InferenceService::HandleChatCompletion(
return cpp::fail(std::make_pair(stt, res));
}
- if (!model_id.empty()) {
- if (auto model_service = model_service_.lock()) {
- auto metadata_ptr = model_service->GetCachedModelMetadata(model_id);
- if (metadata_ptr != nullptr &&
- !metadata_ptr->tokenizer->chat_template.empty()) {
- auto tokenizer = metadata_ptr->tokenizer;
- auto messages = (*json_body)["messages"];
- Json::Value messages_jsoncpp(Json::arrayValue);
- for (auto message : messages) {
- messages_jsoncpp.append(message);
- }
-
- Json::Value tools(Json::arrayValue);
- Json::Value template_data_json;
- template_data_json["messages"] = messages_jsoncpp;
- // template_data_json["tools"] = tools;
-
- auto prompt_result = jinja::RenderTemplate(
- tokenizer->chat_template, template_data_json, tokenizer->bos_token,
- tokenizer->eos_token, tokenizer->add_bos_token,
- tokenizer->add_eos_token, tokenizer->add_generation_prompt);
- if (prompt_result.has_value()) {
- (*json_body)["prompt"] = prompt_result.value();
- if (json_body->isMember("stop")) {
- bool need_append = true;
- for (auto& s : (*json_body)["stop"]) {
- if (s.asString() == tokenizer->eos_token) {
- need_append = false;
- }
- }
- if (need_append) {
- (*json_body)["stop"].append(tokenizer->eos_token);
- }
- } else {
- Json::Value stops(Json::arrayValue);
- stops.append(tokenizer->eos_token);
- (*json_body)["stop"] = stops;
- }
- } else {
- CTL_ERR("Failed to render prompt: " + prompt_result.error());
- }
- }
- }
- }
-
CTL_DBG("Json body inference: " + json_body->toStyledString());
auto cb = [q, tool_choice](Json::Value status, Json::Value res) {
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index d9359b698..a3771e0a1 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -165,8 +165,8 @@ ModelService::ModelService(std::shared_ptr db_service,
download_service_{download_service},
inference_svc_(inference_service),
engine_svc_(engine_svc),
- task_queue_(task_queue) {
- // ProcessBgrTasks();
+ task_queue_(task_queue){
+ // ProcessBgrTasks();
};
void ModelService::ForceIndexingModelList() {
@@ -500,13 +500,10 @@ cpp::result ModelService::DeleteModel(
std::filesystem::remove(yaml_fp);
CTL_INF("Removed: " << yaml_fp.string());
} else {
- // Remove yaml files
- for (const auto& entry :
- std::filesystem::directory_iterator(yaml_fp.parent_path())) {
- if (entry.is_regular_file() && (entry.path().extension() == ".yml")) {
- std::filesystem::remove(entry);
- CTL_INF("Removed: " << entry.path().string());
- }
+ // Is a local model - Remove only this model's yaml file
+ if (std::filesystem::exists(yaml_fp)) {
+ std::filesystem::remove(yaml_fp);
+ CTL_INF("Removed: " << yaml_fp.string());
}
}
@@ -557,6 +554,8 @@ cpp::result ModelService::StartModel(
if (auto& o = params_override["ctx_len"]; !o.isNull()) {
ctx_len = o.asInt();
}
+ Json::Value model_load_params;
+ json_helper::MergeJson(model_load_params, params_override);
try {
constexpr const int kDefautlContextLength = 8192;
@@ -627,9 +626,14 @@ cpp::result ModelService::StartModel(
#if defined(_WIN32)
json_data["model_path"] = cortex::wc::WstringToUtf8(
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring());
+ model_load_params["model_path"] =
+ cortex::wc::WstringToUtf8(
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring());
#else
json_data["model_path"] =
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
+ model_load_params["model_path"] =
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
#endif
} else {
LOG_WARN << "model_path is empty";
@@ -642,6 +646,8 @@ cpp::result ModelService::StartModel(
#else
json_data["mmproj"] =
fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
+ model_load_params["model_path"] =
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
#endif
}
json_data["system_prompt"] = mc.system_template;
@@ -655,6 +661,7 @@ cpp::result ModelService::StartModel(
}
json_data["model"] = model_handle;
+ model_load_params["model"] = model_handle;
if (auto& cpt = custom_prompt_template; !cpt.value_or("").empty()) {
auto parse_prompt_result = string_utils::ParsePrompt(cpt.value());
json_data["system_prompt"] = parse_prompt_result.system_prompt;
@@ -662,8 +669,6 @@ cpp::result ModelService::StartModel(
json_data["ai_prompt"] = parse_prompt_result.ai_prompt;
}
- json_helper::MergeJson(json_data, params_override);
-
// Set default cpu_threads if it is not configured
if (!json_data.isMember("cpu_threads")) {
json_data["cpu_threads"] = GetCpuThreads();
@@ -686,26 +691,12 @@ cpp::result ModelService::StartModel(
assert(!!inference_svc_);
- auto ir =
- inference_svc_->LoadModel(std::make_shared(json_data));
+ auto ir = inference_svc_->LoadModel(
+ std::make_shared(model_load_params));
auto status = std::get<0>(ir)["status_code"].asInt();
auto data = std::get<1>(ir);
if (status == drogon::k200OK) {
- // start model successfully, in case not vision model, we store the metadata so we can use
- // for each inference
- if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) {
- auto metadata_res = GetModelMetadata(model_handle);
- if (metadata_res.has_value()) {
- loaded_model_metadata_map_.emplace(model_handle,
- std::move(metadata_res.value()));
- CTL_INF("Successfully stored metadata for model " << model_handle);
- } else {
- CTL_WRN("Failed to get metadata for model " << model_handle << ": "
- << metadata_res.error());
- }
- }
-
return StartModelResult{/* .success = */ true,
/* .warning = */ may_fallback_res.value()};
} else if (status == drogon::k409Conflict) {
@@ -760,8 +751,6 @@ cpp::result ModelService::StopModel(
if (bypass_check) {
bypass_stop_check_set_.erase(model_handle);
}
- loaded_model_metadata_map_.erase(model_handle);
- CTL_INF("Removed metadata for model " << model_handle);
return true;
} else {
CTL_ERR("Model failed to stop with status code: " << status);
@@ -1047,13 +1036,15 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
auto es = hardware::EstimateLLaMACppRun(model_path, rc);
if (!!es && (*es).gpu_mode.vram_MiB > free_vram_MiB && is_cuda) {
- CTL_WRN("Not enough VRAM - " << "required: " << (*es).gpu_mode.vram_MiB
- << ", available: " << free_vram_MiB);
+ CTL_WRN("Not enough VRAM - "
+ << "required: " << (*es).gpu_mode.vram_MiB
+ << ", available: " << free_vram_MiB);
}
if (!!es && (*es).cpu_mode.ram_MiB > free_ram_MiB) {
- CTL_WRN("Not enough RAM - " << "required: " << (*es).cpu_mode.ram_MiB
- << ", available: " << free_ram_MiB);
+ CTL_WRN("Not enough RAM - "
+ << "required: " << (*es).cpu_mode.ram_MiB
+ << ", available: " << free_ram_MiB);
}
return warning;
@@ -1090,14 +1081,6 @@ ModelService::GetModelMetadata(const std::string& model_id) const {
return std::move(*model_metadata_res);
}
-std::shared_ptr ModelService::GetCachedModelMetadata(
- const std::string& model_id) const {
- if (loaded_model_metadata_map_.find(model_id) ==
- loaded_model_metadata_map_.end())
- return nullptr;
- return loaded_model_metadata_map_.at(model_id);
-}
-
std::string ModelService::GetEngineByModelId(
const std::string& model_id) const {
namespace fs = std::filesystem;
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index beba91f8c..fa247b954 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -83,9 +83,6 @@ class ModelService {
cpp::result, std::string> GetModelMetadata(
const std::string& model_id) const;
- std::shared_ptr GetCachedModelMetadata(
- const std::string& model_id) const;
-
std::string GetEngineByModelId(const std::string& model_id) const;
private:
@@ -104,12 +101,6 @@ class ModelService {
std::unordered_set bypass_stop_check_set_;
std::shared_ptr engine_svc_ = nullptr;
- /**
- * Store the chat template of loaded model.
- */
- std::unordered_map>
- loaded_model_metadata_map_;
-
std::mutex es_mtx_;
std::unordered_map> es_;
cortex::TaskQueue& task_queue_;
diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc
index b5979667c..661b9b580 100644
--- a/engine/services/model_source_service.cc
+++ b/engine/services/model_source_service.cc
@@ -433,8 +433,7 @@ cpp::result ModelSourceService::AddCortexsoRepo(
auto author = hub_author;
auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
- if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
- model_author.has_value() && !model_author.value().empty()) {
+ if (model_author.has_value() && !model_author.value().empty()) {
author = model_author.value();
}
diff --git a/engine/test/components/test_function_calling.cc b/engine/test/components/test_function_calling.cc
deleted file mode 100644
index 7a4810b29..000000000
--- a/engine/test/components/test_function_calling.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-#include
-#include "gtest/gtest.h"
-#include "json/json.h"
-#include "utils/function_calling/common.h"
-
-class FunctionCallingUtilsTest : public ::testing::Test {
- protected:
- std::shared_ptr createTestRequest() {
- auto request = std::make_shared();
- (*request)["tools"] = Json::Value(Json::arrayValue);
- return request;
- }
-};
-
-TEST_F(FunctionCallingUtilsTest, ReplaceCustomFunctions) {
- std::string original = "Test placeholder";
- std::string replacement = "Custom function";
- std::string result =
- function_calling_utils::ReplaceCustomFunctions(original, replacement);
- EXPECT_EQ(result, "Test Custom function placeholder");
-}
-
-TEST_F(FunctionCallingUtilsTest, HasTools) {
- auto request = createTestRequest();
- EXPECT_FALSE(function_calling_utils::HasTools(request));
-
- (*request)["tools"].append(Json::Value());
- EXPECT_TRUE(function_calling_utils::HasTools(request));
-
- (*request)["tools"] = "random";
- EXPECT_FALSE(function_calling_utils::HasTools(request));
-
- (*request)["tools"] = Json::Value::null;
- EXPECT_FALSE(function_calling_utils::HasTools(request));
-}
-
-TEST_F(FunctionCallingUtilsTest, ProcessTools) {
- auto request = createTestRequest();
- Json::Value tool;
- tool["type"] = "function";
- tool["function"]["name"] = "test_function";
- tool["function"]["description"] = "Test description";
- (*request)["tools"].append(tool);
-
- std::string result = function_calling_utils::ProcessTools(request);
- EXPECT_TRUE(
- result.find("Use the function 'test_function' to: Test description") !=
- std::string::npos);
-}
-
-TEST_F(FunctionCallingUtilsTest, ParseMultipleFunctionStrings) {
- std::string input =
- "{\"arg\":\"value1\"}"
- "function>{\"arg\":\"value2\"}";
- Json::Value result =
- function_calling_utils::ParseMultipleFunctionStrings(input);
-
- ASSERT_EQ(result.size(), 2);
- EXPECT_EQ(result[0]["function"]["name"].asString(), "func1");
- EXPECT_EQ(result[0]["function"]["arguments"].asString(),
- "{\"arg\":\"value1\"}");
- EXPECT_EQ(result[1]["function"]["name"].asString(), "func2");
- EXPECT_EQ(result[1]["function"]["arguments"].asString(),
- "{\"arg\":\"value2\"}");
-}
-
-TEST_F(FunctionCallingUtilsTest, ConvertJsonToFunctionStrings) {
- Json::Value jsonArray(Json::arrayValue);
- Json::Value function1, function2;
- function1["function"]["name"] = "func1";
- function1["function"]["arguments"] = "{\"arg\":\"value1\"}";
- function2["function"]["name"] = "func2";
- function2["function"]["arguments"] = "{\"arg\":\"value2\"}";
- jsonArray.append(function1);
- jsonArray.append(function2);
-
- std::string result =
- function_calling_utils::ConvertJsonToFunctionStrings(jsonArray);
- EXPECT_EQ(result,
- "{\"arg\":\"value1\"}"
- "function>{\"arg\":\"value2\"}");
-}
-
-TEST_F(FunctionCallingUtilsTest, CreateCustomFunctionsString) {
- auto request = createTestRequest();
- Json::Value tool;
- tool["type"] = "function";
- tool["function"]["name"] = "test_function";
- tool["function"]["description"] = "Test description";
- (*request)["tools"].append(tool);
-
- std::string result =
- function_calling_utils::CreateCustomFunctionsString(request);
- EXPECT_TRUE(result.find("```") != std::string::npos);
- EXPECT_TRUE(
- result.find("Use the function 'test_function' to: Test description") !=
- std::string::npos);
-}
-
-TEST_F(FunctionCallingUtilsTest, IsValidToolChoiceFormat) {
- Json::Value validTool;
- validTool["type"] = "function";
- validTool["function"]["name"] = "test_function";
- EXPECT_TRUE(function_calling_utils::IsValidToolChoiceFormat(validTool));
-
- Json::Value invalidTool;
- EXPECT_FALSE(function_calling_utils::IsValidToolChoiceFormat(invalidTool));
-}
-
-TEST_F(FunctionCallingUtilsTest, UpdateMessages) {
- auto request = createTestRequest();
- std::string system_prompt = "Original prompt";
- (*request)["messages"] = Json::Value(Json::arrayValue);
-
- function_calling_utils::UpdateMessages(system_prompt, request);
-
- ASSERT_TRUE((*request)["messages"].isArray());
- EXPECT_EQ((*request)["messages"][0]["role"].asString(), "system");
- EXPECT_EQ((*request)["messages"][0]["content"].asString(), system_prompt);
-}
-
-TEST_F(FunctionCallingUtilsTest, PreprocessRequest) {
- auto request = createTestRequest();
- Json::Value tool;
- tool["type"] = "function";
- tool["function"]["name"] = "test_function";
- tool["function"]["description"] = "Test description";
- (*request)["tools"].append(tool);
-
- function_calling_utils::PreprocessRequest(request);
-
- ASSERT_TRUE((*request)["messages"].isArray());
- EXPECT_TRUE((*request)["messages"][0]["content"].asString().find(
- "Test description") != std::string::npos);
-}
-
-TEST_F(FunctionCallingUtilsTest, PostProcessResponse) {
- Json::Value response;
- response["choices"] = Json::Value(Json::arrayValue);
- Json::Value choice;
- choice["message"]["content"] =
- "{\"arg\":\"value\"}";
- response["choices"].append(choice);
-
- function_calling_utils::PostProcessResponse(response);
-
- EXPECT_EQ(response["choices"][0]["message"]["content"].asString(), "");
- EXPECT_TRUE(response["choices"][0]["message"]["tool_calls"].isArray());
- EXPECT_EQ(
- response["choices"][0]["message"]["tool_calls"][0]["function"]["name"]
- .asString(),
- "test_function");
- EXPECT_EQ(response["choices"][0]["message"]["tool_calls"][0]["function"]
- ["arguments"]
- .asString(),
- "{\"arg\":\"value\"}");
-}
\ No newline at end of file
diff --git a/engine/utils/cli_selection_utils.h b/engine/utils/cli_selection_utils.h
index dca6fe675..487c21e6b 100644
--- a/engine/utils/cli_selection_utils.h
+++ b/engine/utils/cli_selection_utils.h
@@ -27,13 +27,13 @@ inline void PrintMenu(
inline std::optional GetNumericValue(const std::string& sval) {
try {
- return std::stoi(sval);
+ return std::stoi(sval);
} catch (const std::invalid_argument&) {
- // Not a valid number
- return std::nullopt;
+ // Not a valid number
+ return std::nullopt;
} catch (const std::out_of_range&) {
- // Number out of range
- return std::nullopt;
+ // Number out of range
+ return std::nullopt;
}
}
@@ -73,14 +73,16 @@ inline std::optional PrintModelSelection(
}
// Validate if the selection consists solely of numeric characters
- if(!std::all_of(selection.begin(), selection.end(), ::isdigit)){
+ if (!std::all_of(selection.begin(), selection.end(), ::isdigit)) {
return std::nullopt;
}
// deal with out of range numeric values
std::optional numeric_value = GetNumericValue(selection);
-
- if (!numeric_value.has_value() || (unsigned) numeric_value.value() > availables.size() || numeric_value.value() < 1) {
+
+ if (!numeric_value.has_value() ||
+ (unsigned)numeric_value.value() > availables.size() ||
+ numeric_value.value() < 1) {
return std::nullopt;
}
@@ -101,13 +103,15 @@ inline std::optional PrintSelection(
}
// Validate if the selection consists solely of numeric characters
- if(!std::all_of(selection.begin(), selection.end(), ::isdigit)){
+ if (!std::all_of(selection.begin(), selection.end(), ::isdigit)) {
return std::nullopt;
}
-
+
// deal with out of range numeric values
std::optional numeric_value = GetNumericValue(selection);
- if (!numeric_value.has_value() ||(unsigned) numeric_value.value() > options.size() || numeric_value.value() < 1) {
+ if (!numeric_value.has_value() ||
+ (unsigned)numeric_value.value() > options.size() ||
+ numeric_value.value() < 1) {
return std::nullopt;
}
diff --git a/engine/utils/function_calling/common.h b/engine/utils/function_calling/common.h
index 34a1c9862..953a9964c 100644
--- a/engine/utils/function_calling/common.h
+++ b/engine/utils/function_calling/common.h
@@ -129,157 +129,4 @@ inline Json::Value ParseJsonString(const std::string& jsonString) {
return root;
}
-inline std::string CreateCustomFunctionsString(
- std::shared_ptr request) {
- std::string customFunctions = ProcessTools(request);
- if (customFunctions.empty()) {
- return ""; // No custom functions found
- }
-
- return "```\n" + customFunctions + "```";
-}
-inline bool IsValidToolChoiceFormat(const Json::Value& root) {
- return root.isObject() && root.isMember("type") && root["type"].isString() &&
- root["type"].asString() == "function" && root.isMember("function") &&
- root["function"].isObject() && root["function"].isMember("name") &&
- root["function"]["name"].isString();
-}
-inline void UpdateMessages(std::string& system_prompt,
- std::shared_ptr request) {
- Json::Value tool_choice = request->get("tool_choice", "auto");
- if (tool_choice.isString() && tool_choice.asString() == "required") {
- system_prompt +=
- "\n\nYou must call a function to answer the user's question.";
- } else if (!tool_choice.isString()) {
-
- system_prompt +=
- "\n\nNow this is your first priority: You must call the function '" +
- tool_choice["function"]["name"].asString() +
- "' to answer the user's question.";
- }
- bool parallel_tool_calls = request->get("parallel_tool_calls", true).asBool();
- if (!parallel_tool_calls) {
- system_prompt += "\n\nNow this is your first priority: You must call the only one function at a time.";
- }
-
- bool tools_call_in_user_message =
- request->get("tools_call_in_user_message", false).asBool();
-
- bool original_stream_config = (*request).get("stream", false).asBool();
- // (*request)["grammar"] = function_calling_utils::gamma_json;
- (*request)["stream"] =
- false; //when using function calling, disable stream automatically because we need to parse the response to get function name and params
-
- if (!request->isMember("messages") || !(*request)["messages"].isArray() ||
- (*request)["messages"].empty()) {
- // If no messages, add the system prompt as the first message
- Json::Value systemMessage;
- systemMessage["role"] = "system";
- systemMessage["content"] = system_prompt;
- (*request)["messages"].append(systemMessage);
- } else {
-
- if (tools_call_in_user_message) {
- for (Json::Value& message : (*request)["messages"]) {
- if (message["role"] == "user" && message.isMember("tools") &&
- message["tools"].isArray() && message["tools"].size() > 0) {
- message["content"] = system_prompt + "\n User question: " +
- message["content"].asString();
- }
- }
- } else {
- Json::Value& firstMessage = (*request)["messages"][0];
- if (firstMessage["role"] == "system") {
- bool addCustomPrompt =
- request->get("add_custom_system_prompt", true).asBool();
- if (addCustomPrompt) {
- firstMessage["content"] =
- system_prompt + "\n" + firstMessage["content"].asString();
- }
- } else {
- // If the first message is not a system message, prepend the system prompt
- Json::Value systemMessage;
- systemMessage["role"] = "system";
- systemMessage["content"] = system_prompt;
- (*request)["messages"].insert(0, systemMessage);
- }
- }
-
- // transform last message role to tool if it is a function call
- Json::Value& lastMessage =
- (*request)["messages"][(*request)["messages"].size() - 1];
- if (lastMessage.get("role", "") == "tool") {
- lastMessage["role"] = function_calling_llama3_1_utils::tool_role;
- (*request)["stream"] =
- original_stream_config; // if role is tool then should restore stream config to original value
- }
- }
- for (Json::Value& message : (*request)["messages"]) {
- if (message["role"] == "assistant" && message.isMember("tool_calls")) {
- const Json::Value& tool_calls = message["tool_calls"];
- if (!tool_calls.isNull() && tool_calls.isArray() &&
- tool_calls.size() > 0) {
- message["content"] = ConvertJsonToFunctionStrings(tool_calls);
- message["tool_calls"] = {};
- }
- }
- }
-}
-inline void PreprocessRequest(std::shared_ptr request) {
- if (!function_calling_utils::HasTools(request)) {
- return; // Exit if no tools present
- }
- if (request->get("tool_choice", "auto").isString()) {
- std::string tool_choice = request->get("tool_choice", "auto").asString();
- if (tool_choice == "none") {
- return; // Exit if tool_choice is none
- }
- }
- std::string customFunctionsString =
- function_calling_utils::CreateCustomFunctionsString(request);
- std::string new_system_prompt =
- function_calling_utils::ReplaceCustomFunctions(
- function_calling_llama3_1_utils::system_prompt,
- customFunctionsString);
- UpdateMessages(new_system_prompt, request);
-}
-
-inline void PostProcessResponse(Json::Value& response) {
- if (!response.isMember("choices") || !response["choices"].isArray() ||
- response["choices"].empty()) {
- // If there are no choices or the structure is incorrect, do nothing
- return;
- }
-
- // Get a reference to the first choice
- Json::Value& firstChoice = response["choices"][0];
-
- // Check if the choice has a message with content
- if (firstChoice.isMember("message") &&
- firstChoice["message"].isMember("content")) {
- std::string content = firstChoice["message"]["content"].asString();
-
- // Create a new structure for tool_calls
- Json::Value toolCall = ParseMultipleFunctionStrings(content);
- if (toolCall.size() > 0) {
- // Add tool_calls to the message
- if (response.get("tool_choice", "auto").isString()) {
- std::string tool_choice =
- response.get("tool_choice", "auto").asString();
- if (tool_choice == "auto") {
- firstChoice["finish_reason"] = "tool_calls";
- } else {
- firstChoice["finish_reason"] = "stop";
- }
- }
-
- firstChoice["message"]["tool_calls"] = toolCall;
-
- // Clear the content as it's now represented in tool_calls
- firstChoice["message"]["content"] = "";
- }
- }
-
- // Add any additional post-processing logic here
-}
} // namespace function_calling_utils
diff --git a/function-calling.py b/function-calling.py
new file mode 100644
index 000000000..32ef31752
--- /dev/null
+++ b/function-calling.py
@@ -0,0 +1,173 @@
+from datetime import datetime
+from openai import OpenAI
+from pydantic import BaseModel
+import json
+
+# MODEL = "deepseek-r1-distill-qwen-7b:7b"
+MODEL = "llama3.1:8b-q8"
+
+client = OpenAI(
+ base_url="http://localhost:39281/v1",
+ api_key="not-needed", # Authentication is not required for local deployment
+)
+
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_navigate",
+ "description": "Navigate to a URL",
+ "parameters": {
+ "properties": {"url": {"type": "string"}},
+ "required": ["url"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_screenshot",
+ "description": "Take a screenshot of the current page or a specific element",
+ "parameters": {
+ "properties": {
+ "height": {
+ "description": "Height in pixels (default: 600)",
+ "type": "number",
+ },
+ "name": {
+ "description": "Name for the screenshot",
+ "type": "string",
+ },
+ "selector": {
+ "description": "CSS selector for element to screenshot",
+ "type": "string",
+ },
+ "width": {
+ "description": "Width in pixels (default: 800)",
+ "type": "number",
+ },
+ },
+ "required": ["name"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_click",
+ "description": "Click an element on the page",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for element to click",
+ "type": "string",
+ }
+ },
+ "required": ["selector"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_fill",
+ "description": "Fill out an input field",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for input field",
+ "type": "string",
+ },
+ "value": {"description": "Value to fill", "type": "string"},
+ },
+ "required": ["selector", "value"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_select",
+ "description": "Select an element on the page with Select tag",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for element to select",
+ "type": "string",
+ },
+ "value": {"description": "Value to select", "type": "string"},
+ },
+ "required": ["selector", "value"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_hover",
+ "description": "Hover an element on the page",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for element to hover",
+ "type": "string",
+ }
+ },
+ "required": ["selector"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_evaluate",
+ "description": "Execute JavaScript in the browser console",
+ "parameters": {
+ "properties": {
+ "script": {
+ "description": "JavaScript code to execute",
+ "type": "string",
+ }
+ },
+ "required": ["script"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+]
+
+completion_payload = {
+ "messages": [
+ {
+ "role": "system",
+ "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
+ },
+ {
+ "role": "user",
+ "content": "go to google search",
+ },
+ ]
+}
+
+response = client.chat.completions.create(
+ top_p=0.9,
+ temperature=0.6,
+ model=MODEL,
+ messages=completion_payload["messages"],
+ tools=tools,
+)
+
+print(response)
\ No newline at end of file