diff --git a/.github/patches/windows/msvcp140.dll b/.github/patches/windows/msvcp140.dll
index f999742d9..d3d103ee0 100644
Binary files a/.github/patches/windows/msvcp140.dll and b/.github/patches/windows/msvcp140.dll differ
diff --git a/.github/patches/windows/vcruntime140.dll b/.github/patches/windows/vcruntime140.dll
index 3a4aded20..8edab904f 100644
Binary files a/.github/patches/windows/vcruntime140.dll and b/.github/patches/windows/vcruntime140.dll differ
diff --git a/.github/patches/windows/vcruntime140_1.dll b/.github/patches/windows/vcruntime140_1.dll
index 3ebabdee6..2ef481dbf 100644
Binary files a/.github/patches/windows/vcruntime140_1.dll and b/.github/patches/windows/vcruntime140_1.dll differ
diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml
index 1bf324d96..64d4e28e7 100644
--- a/.github/workflows/beta-build.yml
+++ b/.github/workflows/beta-build.yml
@@ -9,7 +9,7 @@ jobs:
get-update-version:
uses: ./.github/workflows/template-get-update-version.yml
- get-cortex-llamacpp-latest-version:
+ get-llamacpp-latest-version:
uses: ./.github/workflows/template-cortex-llamacpp-latest-version.yml
create-draft-release:
@@ -39,7 +39,7 @@ jobs:
build-macos:
uses: ./.github/workflows/template-build-macos.yml
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
secrets: inherit
with:
ref: ${{ github.ref }}
@@ -48,12 +48,12 @@ jobs:
cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
channel: beta
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
build-windows-x64:
uses: ./.github/workflows/template-build-windows-x64.yml
secrets: inherit
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
with:
ref: ${{ github.ref }}
public_provider: github
@@ -64,12 +64,12 @@ jobs:
ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
channel: beta
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
build-linux-x64:
uses: ./.github/workflows/template-build-linux.yml
secrets: inherit
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
with:
ref: ${{ github.ref }}
public_provider: github
@@ -78,28 +78,28 @@ jobs:
cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
channel: beta
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
arch: amd64
- build-linux-arm64:
- uses: ./.github/workflows/template-build-linux.yml
- secrets: inherit
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
- with:
- ref: ${{ github.ref }}
- public_provider: github
- new_version: ${{ needs.get-update-version.outputs.new_version }}
- runs-on: ubuntu-2004-arm64
- cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
- channel: beta
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
- arch: arm64
+ # build-linux-arm64:
+ # uses: ./.github/workflows/template-build-linux.yml
+ # secrets: inherit
+ # needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
+ # with:
+ # ref: ${{ github.ref }}
+ # public_provider: github
+ # new_version: ${{ needs.get-update-version.outputs.new_version }}
+ # runs-on: ubuntu-2004-arm64
+ # cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # channel: beta
+ # upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
+ # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
+ # arch: arm64
build-docker-x64:
uses: ./.github/workflows/template-build-docker-x64.yml
secrets: inherit
- needs: [get-update-version, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, get-llamacpp-latest-version]
with:
ref: ${{ github.ref }}
new_version: ${{ needs.get-update-version.outputs.new_version }}
@@ -127,7 +127,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
noti-discord:
- needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, build-linux-arm64, update_release]
+ needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, update_release]
runs-on: ubuntu-latest
permissions:
contents: write
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 279dd77d6..02774d159 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -21,12 +21,12 @@ jobs:
fail-fast: false
matrix:
include:
- - os: "linux"
- name: "arm64"
- runs-on: "ubuntu-2004-arm64"
- cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
- build-deps-cmake-flags: ""
- ccache-dir: ""
+ # - os: "linux"
+ # name: "arm64"
+ # runs-on: "ubuntu-2004-arm64"
+ # cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # build-deps-cmake-flags: ""
+ # ccache-dir: ""
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-cuda-12-0"
@@ -150,6 +150,7 @@ jobs:
run: |
cd engine
mkdir -p ~/.config/cortexcpp/
+ mkdir -p ~/.local/share/cortexcpp/
echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" > ~/.config/cortexcpp/.cortexrc
echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.config/cortexcpp/.cortexrc
# ./build/cortex
@@ -177,6 +178,7 @@ jobs:
run: |
cd engine
mkdir -p ~/.config/cortexcpp/
+ mkdir -p ~/.local/share/cortexcpp/
echo "apiServerPort: 3928" > ~/.config/cortexcpp/.cortexrc
echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" >> ~/.config/cortexcpp/.cortexrc
echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.config/cortexcpp/.cortexrc
@@ -352,12 +354,12 @@ jobs:
fail-fast: false
matrix:
include:
- - os: "linux"
- name: "arm64"
- runs-on: "ubuntu-2004-arm64"
- cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
- build-deps-cmake-flags: ""
- ccache-dir: ""
+ # - os: "linux"
+ # name: "arm64"
+ # runs-on: "ubuntu-2004-arm64"
+ # cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # build-deps-cmake-flags: ""
+ # ccache-dir: ""
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-cuda-12-0"
@@ -456,6 +458,7 @@ jobs:
run: |
cd engine
mkdir -p ~/.config/cortexcpp/
+ mkdir -p ~/.local/share/cortexcpp/
echo "gitHubToken: ${{ secrets.GITHUB_TOKEN }}" > ~/.config/cortexcpp/.cortexrc
# ./build/cortex
cat ~/.config/cortexcpp/.cortexrc
@@ -481,6 +484,7 @@ jobs:
run: |
cd engine
mkdir -p ~/.config/cortexcpp/
+ mkdir -p ~/.local/share/cortexcpp/
echo "apiServerPort: 3928" > ~/.config/cortexcpp/.cortexrc
echo "gitHubToken: ${{ secrets.GITHUB_TOKEN }}" > ~/.config/cortexcpp/.cortexrc
# ./build/cortex
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
index 1f076dc97..f013a90e2 100644
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -43,12 +43,12 @@ jobs:
get-update-version:
uses: ./.github/workflows/template-get-update-version.yml
- get-cortex-llamacpp-latest-version:
+ get-llamacpp-latest-version:
uses: ./.github/workflows/template-cortex-llamacpp-latest-version.yml
build-macos:
uses: ./.github/workflows/template-build-macos.yml
- needs: [get-update-version, set-public-provider, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, set-public-provider, get-llamacpp-latest-version]
secrets: inherit
with:
ref: ${{ needs.set-public-provider.outputs.ref }}
@@ -56,12 +56,12 @@ jobs:
new_version: ${{ needs.get-update-version.outputs.new_version }}
cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
channel: nightly
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
build-windows-x64:
uses: ./.github/workflows/template-build-windows-x64.yml
secrets: inherit
- needs: [get-update-version, set-public-provider, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, set-public-provider, get-llamacpp-latest-version]
with:
ref: ${{ needs.set-public-provider.outputs.ref }}
public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
@@ -71,12 +71,12 @@ jobs:
build-deps-cmake-flags: "-DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
channel: nightly
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
build-linux-x64:
uses: ./.github/workflows/template-build-linux.yml
secrets: inherit
- needs: [get-update-version, set-public-provider, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, set-public-provider, get-llamacpp-latest-version]
with:
ref: ${{ needs.set-public-provider.outputs.ref }}
public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
@@ -84,27 +84,27 @@ jobs:
runs-on: ubuntu-20-04
cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
channel: nightly
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
arch: amd64
- build-linux-arm64:
- uses: ./.github/workflows/template-build-linux.yml
- secrets: inherit
- needs: [get-update-version, set-public-provider, get-cortex-llamacpp-latest-version]
- with:
- ref: ${{ needs.set-public-provider.outputs.ref }}
- public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
- new_version: ${{ needs.get-update-version.outputs.new_version }}
- runs-on: ubuntu-2004-arm64
- cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
- channel: nightly
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
- arch: arm64
+ # build-linux-arm64:
+ # uses: ./.github/workflows/template-build-linux.yml
+ # secrets: inherit
+ # needs: [get-update-version, set-public-provider, get-llamacpp-latest-version]
+ # with:
+ # ref: ${{ needs.set-public-provider.outputs.ref }}
+ # public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
+ # new_version: ${{ needs.get-update-version.outputs.new_version }}
+ # runs-on: ubuntu-2004-arm64
+ # cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # channel: nightly
+ # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
+ # arch: arm64
update-latest-version:
runs-on: ubuntu-latest
if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
- needs: [get-update-version, set-public-provider, build-linux-x64, build-linux-arm64, build-macos, build-windows-x64, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, set-public-provider, build-linux-x64, build-macos, build-windows-x64, get-llamacpp-latest-version]
steps:
- name: Update latest version
id: update-latest-version
@@ -132,7 +132,7 @@ jobs:
if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
uses: ./.github/workflows/template-build-docker-x64.yml
secrets: inherit
- needs: [get-update-version, set-public-provider, get-cortex-llamacpp-latest-version, update-latest-version]
+ needs: [get-update-version, set-public-provider, get-llamacpp-latest-version, update-latest-version]
with:
ref: ${{ needs.set-public-provider.outputs.ref }}
new_version: nightly-${{ needs.get-update-version.outputs.new_version }}
@@ -141,7 +141,7 @@ jobs:
tags: menloltd/cortex:nightly-${{ needs.get-update-version.outputs.new_version }}
noti-discord-nightly-and-update-url-readme:
- needs: [build-macos, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, get-cortex-llamacpp-latest-version, update-latest-version, build-docker-x64]
+ needs: [build-macos, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, get-llamacpp-latest-version, update-latest-version, build-docker-x64]
secrets: inherit
if: github.event_name == 'schedule'
uses: ./.github/workflows/template-noti-discord.yaml
@@ -150,7 +150,7 @@ jobs:
new_version: ${{ needs.get-update-version.outputs.new_version }}
noti-discord-manual:
- needs: [build-macos, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, get-cortex-llamacpp-latest-version, build-docker-x64]
+ needs: [build-macos, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, get-llamacpp-latest-version, build-docker-x64]
secrets: inherit
if: github.event_name == 'workflow_dispatch' && github.event.inputs.public_provider == 'aws-s3'
uses: ./.github/workflows/template-noti-discord.yaml
diff --git a/.github/workflows/stable-build.yml b/.github/workflows/stable-build.yml
index b05df983d..27e05f9ce 100644
--- a/.github/workflows/stable-build.yml
+++ b/.github/workflows/stable-build.yml
@@ -9,7 +9,7 @@ jobs:
get-update-version:
uses: ./.github/workflows/template-get-update-version.yml
- get-cortex-llamacpp-latest-version:
+ get-llamacpp-latest-version:
uses: ./.github/workflows/template-cortex-llamacpp-latest-version.yml
create-draft-release:
@@ -39,7 +39,7 @@ jobs:
build-macos:
uses: ./.github/workflows/template-build-macos.yml
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
secrets: inherit
with:
ref: ${{ github.ref }}
@@ -48,12 +48,12 @@ jobs:
cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
channel: stable
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
build-windows-x64:
uses: ./.github/workflows/template-build-windows-x64.yml
secrets: inherit
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
with:
ref: ${{ github.ref }}
public_provider: github
@@ -64,12 +64,12 @@ jobs:
ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
channel: stable
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
build-linux-x64:
uses: ./.github/workflows/template-build-linux.yml
secrets: inherit
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
with:
ref: ${{ github.ref }}
public_provider: github
@@ -78,28 +78,28 @@ jobs:
cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
channel: stable
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
arch: amd64
- build-linux-arm64:
- uses: ./.github/workflows/template-build-linux.yml
- secrets: inherit
- needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
- with:
- ref: ${{ github.ref }}
- public_provider: github
- new_version: ${{ needs.get-update-version.outputs.new_version }}
- runs-on: ubuntu-2004-arm64
- cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
- channel: stable
- upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
- cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
- arch: arm64
+ # build-linux-arm64:
+ # uses: ./.github/workflows/template-build-linux.yml
+ # secrets: inherit
+ # needs: [get-update-version, create-draft-release, get-llamacpp-latest-version]
+ # with:
+ # ref: ${{ github.ref }}
+ # public_provider: github
+ # new_version: ${{ needs.get-update-version.outputs.new_version }}
+ # runs-on: ubuntu-2004-arm64
+ # cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ # channel: stable
+ # upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
+ # llamacpp-version: ${{ needs.get-llamacpp-latest-version.outputs.llamacpp_latest_version }}
+ # arch: arm64
build-docker-x64:
uses: ./.github/workflows/template-build-docker-x64.yml
secrets: inherit
- needs: [get-update-version, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, get-llamacpp-latest-version]
with:
ref: ${{ github.ref }}
new_version: ${{ needs.get-update-version.outputs.new_version }}
diff --git a/.github/workflows/template-build-linux.yml b/.github/workflows/template-build-linux.yml
index 3fa802ad4..0ebd04176 100644
--- a/.github/workflows/template-build-linux.yml
+++ b/.github/workflows/template-build-linux.yml
@@ -44,7 +44,7 @@ on:
type: string
default: 'nightly'
description: 'The channel to use for this job'
- cortex-llamacpp-version:
+ llamacpp-version:
required: true
type: string
default: '0.0.0'
@@ -169,23 +169,23 @@ jobs:
mkdir -p engine/templates/linux/dependencies
cd engine/templates/linux/dependencies
if [ "${{ inputs.arch }}" == "amd64" ]; then
- # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-11-7.tar.gz
- # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-12-0.tar.gz
- # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-11-7.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-12-0.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2.tar.gz
- # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-11-7.tar.gz
- # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-12-0.tar.gz
- # wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-11-7.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-12-0.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-vulkan.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-linux-amd64.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-linux-amd64.tar.gz
+ # wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx-cuda-cu11.7-x64.tar.gz
+ # wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx-cuda-cu12.0-x64.tar.gz
+ # wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx-x64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx2-cuda-cu11.7-x64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx2-cuda-cu12.0-x64.tar.gz
+ wget https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-ubuntu-x64.zip
+ # wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx512-cuda-cu11.7-x64.tar.gz
+ # wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx512-cuda-cu12.0-x64.tar.gz
+ # wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-avx512-x64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-noavx-cuda-cu11.7-x64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-noavx-cuda-cu12.0-x64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-linux-noavx-x64.tar.gz
+ wget https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-ubuntu-vulkan-x64.zip
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/cudart-llama-bin-linux-cu11.7-x64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/cudart-llama-bin-win-cu12.0-x64.tar.gz
else
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-arm64.tar.gz
+ wget https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-ubuntu-arm64.zip
fi
cd ..
diff --git a/.github/workflows/template-build-macos.yml b/.github/workflows/template-build-macos.yml
index 20c7430fb..ea96d2df6 100644
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@@ -39,7 +39,7 @@ on:
type: string
default: 'nightly'
description: 'The channel to use for this job'
- cortex-llamacpp-version:
+ llamacpp-version:
required: true
type: string
default: '0.0.0'
@@ -253,6 +253,14 @@ jobs:
cd engine
make codesign-binary CODE_SIGN=true DEVELOPER_ID="${{ secrets.DEVELOPER_ID }}" DESTINATION_BINARY_NAME="${{ steps.set-output-params.outputs.destination_binary_name }}" DESTINATION_BINARY_SERVER_NAME="${{ steps.set-output-params.outputs.destination_binary_server_name }}"
+ - name: Code Signing binaries for separate binary
+ run: |
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ codesign --force -s "${{ secrets.DEVELOPER_ID }}" --options=runtime --entitlements="./engine/templates/macos/entitlements.plist" ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+
- name: Notary macOS Binary
run: |
curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sh -s -- -b /usr/local/bin
@@ -265,6 +273,18 @@ jobs:
QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }}
QUILL_NOTARY_KEY: "/tmp/notary-key.p8"
+ - name: Notary macOS Binary for separate binary
+ run: |
+ # Notarize the binary
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-arm64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_name }}
+ quill notarize ./cortex-${{ inputs.new_version }}-mac-amd64/${{ steps.set-output-params.outputs.destination_binary_server_name }}
+ env:
+ QUILL_NOTARY_KEY_ID: ${{ secrets.NOTARY_KEY_ID }}
+ QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }}
+ QUILL_NOTARY_KEY: "/tmp/notary-key.p8"
+
- name: Build network Installers
shell: bash
run: |
@@ -289,8 +309,8 @@ jobs:
run: |
mkdir -p engine/templates/macos/Scripts/dependencies
cd engine/templates/macos/Scripts/dependencies
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-mac-arm64.tar.gz
- wget https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-mac-amd64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-macos-arm64.tar.gz
+ wget https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-macos-x64.tar.gz
cd ../../
chmod +x create_pkg_local.sh
@@ -310,6 +330,24 @@ jobs:
xcrun notarytool submit ${{ steps.set-output-params.outputs.package_name }}-local.pkg --apple-id ${{ secrets.APPLE_ID }} --password ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }} --team-id ${{ secrets.APPLE_TEAM_ID }} --wait
- name: Package
+ run: |
+ mkdir temp
+ # Mac arm64
+ mv cortex-${{ inputs.new_version }}-mac-arm64 temp/cortex
+ cd temp
+ tar -czvf cortex-arm64.tar.gz cortex
+ mv cortex-arm64.tar.gz ../cortex-arm64.tar.gz
+ cd ..
+ rm -rf temp/cortex
+
+ # Mac amd64
+ mv cortex-${{ inputs.new_version }}-mac-amd64 temp/cortex
+ cd temp
+ tar -czvf cortex-amd64.tar.gz cortex
+ mv cortex-amd64.tar.gz ../cortex-amd64.tar.gz
+ cd ..
+
+ - name: Package for separate binary
run: |
cd engine
make package
@@ -320,6 +358,18 @@ jobs:
name: cortex-${{ inputs.new_version }}-mac-universal
path: ./engine/cortex
+ - name: Upload Artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: cortex-${{ inputs.new_version }}-mac-arm64-signed
+ path: ./cortex-${{ inputs.new_version }}-mac-arm64
+
+ - name: Upload Artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: cortex-${{ inputs.new_version }}-mac-amd64-signed
+ path: ./cortex-${{ inputs.new_version }}-mac-amd64
+
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
@@ -358,6 +408,28 @@ jobs:
asset_name: cortex-${{ inputs.new_version }}-mac-universal.tar.gz
asset_content_type: application/zip
+ - name: Upload release assert if public provider is github
+ if: inputs.public_provider == 'github'
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ uses: actions/upload-release-asset@v1.0.1
+ with:
+ upload_url: ${{ inputs.upload_url }}
+ asset_path: ./cortex-arm64.tar.gz
+ asset_name: cortex-${{ inputs.new_version }}-mac-arm64.tar.gz
+ asset_content_type: application/zip
+
+ - name: Upload release assert if public provider is github
+ if: inputs.public_provider == 'github'
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ uses: actions/upload-release-asset@v1.0.1
+ with:
+ upload_url: ${{ inputs.upload_url }}
+ asset_path: ./cortex-amd64.tar.gz
+ asset_name: cortex-${{ inputs.new_version }}-mac-amd64.tar.gz
+ asset_content_type: application/zip
+
- name: Upload release assert if public provider is github
if: inputs.public_provider == 'github'
env:
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index b9e0c9937..399e3dd3e 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -44,7 +44,7 @@ on:
type: string
default: 'nightly'
description: 'The channel to use for this job'
- cortex-llamacpp-version:
+ llamacpp-version:
required: true
type: string
default: '0.0.0'
@@ -205,21 +205,21 @@ jobs:
run: |
mkdir dependencies
cd dependencies
- # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx-cuda-11-7.tar.gz
- # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx-cuda-12-0.tar.gz
- # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2-cuda-11-7.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2-cuda-12-0.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx2.tar.gz
- # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512-cuda-11-7.tar.gz
- # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512-cuda-12-0.tar.gz
- # wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-avx512.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx-cuda-11-7.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx-cuda-12-0.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-noavx.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-windows-amd64-vulkan.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-windows-amd64.tar.gz
- wget.exe https://github.com/menloresearch/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-windows-amd64.tar.gz
+ # wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx-cuda-cu11.7-x64.tar.gz
+ # wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx-cuda-cu12.0-x64.tar.gz
+ # wget.exe https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx-x64.zip
+ wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx2-cuda-cu11.7-x64.tar.gz
+ wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx2-cuda-cu12.0-x64.tar.gz
+ wget.exe https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx2-x64.zip
+ # wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx512-cuda-cu11.7-x64.tar.gz
+ # wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx512-cuda-cu12.0-x64.tar.gz
+ # wget.exe https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-avx512-x64.zip
+ wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-noavx-cuda-cu11.7-x64.tar.gz
+ wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-noavx-cuda-cu12.0-x64.tar.gz
+ wget.exe https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-noavx-x64.zip
+ wget.exe https://github.com/ggml-org/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/llama-${{ inputs.llamacpp-version }}-bin-win-vulkan-x64.zip
+ wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/cudart-llama-bin-win-cu11.7-x64.tar.gz
+ wget.exe https://github.com/menloresearch/llama.cpp/releases/download/${{ inputs.llamacpp-version }}/cudart-llama-bin-win-cu12.0-x64.tar.gz
- name: Enable long paths
run: |
diff --git a/.github/workflows/template-cortex-llamacpp-latest-version.yml b/.github/workflows/template-cortex-llamacpp-latest-version.yml
index 610b1a89a..3d7b74e56 100644
--- a/.github/workflows/template-cortex-llamacpp-latest-version.yml
+++ b/.github/workflows/template-cortex-llamacpp-latest-version.yml
@@ -1,13 +1,13 @@
-name: get-cortex-llamacpp-latest-version
+name: get-llamacpp-latest-version
on:
workflow_call:
outputs:
- cortex_llamacpp_latest_version:
+ llamacpp_latest_version:
description: 'The latest version of cortex.llamacpp engines'
- value: ${{ jobs.get-cortex-llamacpp-latest-version.outputs.new_version }}
+ value: ${{ jobs.get-llamacpp-latest-version.outputs.new_version }}
jobs:
- get-cortex-llamacpp-latest-version:
+ get-llamacpp-latest-version:
runs-on: ubuntu-latest
outputs:
new_version: ${{ steps.version_update.outputs.new_version }}
@@ -24,7 +24,7 @@ jobs:
local max_retries=3
local tag
while [ $retries -lt $max_retries ]; do
- tag=$(curl -s https://api.github.com/repos/menloresearch/cortex.llamacpp/releases/latest | jq -r .tag_name)
+ tag=$(curl -s https://api.github.com/repos/menloresearch/llama.cpp/releases/latest | jq -r .tag_name)
if [ -n "$tag" ] && [ "$tag" != "null" ]; then
echo $tag
return
diff --git a/README.md b/README.md
index 5cd51ece1..f56842d29 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,10 @@
+
+
🚨 Archived Repository Notice
+
This repository is no longer actively maintained.
+
Development has moved to menloresearch/llama.cpp.
+
Please contribute directly to llama.cpp
moving forward.
+
+
# Cortex
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 744c3899c..5f04da12e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -24,7 +24,6 @@ RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/nul
apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \
apt-get update && \
apt-get install -y --no-install-recommends \
- cmake \
make \
git \
uuid-dev \
@@ -37,11 +36,21 @@ RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/nul
ninja-build \
pkg-config \
python3-pip \
- openssl && \
+ openssl \
+ libssl-dev && \
pip3 install awscli && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
+# Download and install CMake 3.22.6
+RUN wget https://github.com/Kitware/CMake/releases/download/v3.22.6/cmake-3.22.6.tar.gz -q -O /tmp/cmake.tar.gz && \
+ tar -xzf /tmp/cmake.tar.gz -C /tmp && \
+ cd /tmp/cmake-3.22.6 && \
+ ./bootstrap && \
+ make -j$(nproc) && \
+ make install && \
+ rm -rf /tmp/cmake.tar.gz /tmp/cmake-3.22.6
+
ARG CORTEX_CPP_VERSION=latest
ARG CMAKE_EXTRA_FLAGS=""
diff --git a/docker/Dockerfile.cache b/docker/Dockerfile.cache
index 0a9cbe02d..3eabc5dce 100644
--- a/docker/Dockerfile.cache
+++ b/docker/Dockerfile.cache
@@ -24,7 +24,6 @@ RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/nul
apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" && \
apt-get update && \
apt-get install -y --no-install-recommends \
- cmake \
make \
git \
uuid-dev \
@@ -37,11 +36,21 @@ RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/nul
ninja-build \
pkg-config \
python3-pip \
- openssl && \
+ openssl \
+ libssl-dev && \
pip3 install awscli && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
+# Download and install CMake 3.22.6
+RUN wget https://github.com/Kitware/CMake/releases/download/v3.22.6/cmake-3.22.6.tar.gz -q -O /tmp/cmake.tar.gz && \
+ tar -xzf /tmp/cmake.tar.gz -C /tmp && \
+ cd /tmp/cmake-3.22.6 && \
+ ./bootstrap && \
+ make -j$(nproc) && \
+ make install && \
+ rm -rf /tmp/cmake.tar.gz /tmp/cmake-3.22.6
+
ARG CORTEX_CPP_VERSION=latest
ARG CMAKE_EXTRA_FLAGS=""
diff --git a/docs/docs/engines/engine-extension.mdx b/docs/docs/engines/engine-extension.mdx
index d2edde830..8b550c5a4 100644
--- a/docs/docs/engines/engine-extension.mdx
+++ b/docs/docs/engines/engine-extension.mdx
@@ -71,9 +71,6 @@ class EngineI {
std::shared_ptr json_body,
std::function&& callback) = 0;
- // Compatibility and model management
- virtual bool IsSupported(const std::string& f) = 0;
-
virtual void GetModels(
std::shared_ptr jsonBody,
std::function&& callback) = 0;
diff --git a/docs/docs/guides/function-calling.md b/docs/docs/guides/function-calling.md
index 6b9157f18..7725f225d 100644
--- a/docs/docs/guides/function-calling.md
+++ b/docs/docs/guides/function-calling.md
@@ -63,8 +63,14 @@ tools = [
completion_payload = {
"messages": [
- {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."},
- {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"},
+ {
+ "role": "system",
+ "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
+ },
+ {
+ "role": "user",
+ "content": "Hi, can you tell me the delivery date for my order?"
+ },
]
}
@@ -126,10 +132,22 @@ Once the user provides their order ID:
```python
completion_payload = {
"messages": [
- {"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."},
- {"role": "user", "content": "Hi, can you tell me the delivery date for my order?"},
- {"role": "assistant", "content": "Of course! Please provide your order ID so I can look it up."},
- {"role": "user", "content": "i think it is order_70705"},
+ {
+ "role": "system",
+ "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
+ },
+ {
+ "role": "user",
+ "content": "Hi, can you tell me the delivery date for my order?"
+ },
+ {
+ "role": "assistant",
+ "content": "Of course! Please provide your order ID so I can look it up."
+ },
+ {
+ "role": "user",
+ "content": "i think it is order_70705"
+ },
]
}
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 23970ef51..b7d628094 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -2754,7 +2754,7 @@
},
"version": {
"type": "string",
- "example": "0.1.35-28.10.24"
+ "example": "b4920"
}
}
}
@@ -2763,11 +2763,11 @@
{
"engine": "llama-cpp",
"name": "mac-arm64",
- "version": "0.1.35-28.10.24"
+ "version": "b4920"
},
{
"engine": "llama-cpp",
- "name": "linux-amd64-avx",
+ "name": "linux-avx-x64",
"version": "0.1.35-27.10.24"
}
]
@@ -2901,7 +2901,7 @@
"name": {
"type": "string",
"description": "The name of the variant, including OS, architecture, and capabilities",
- "example": "linux-amd64-avx-cuda-11-7"
+ "example": "linux-avx-x64-cuda-11-7"
},
"created_at": {
"type": "string",
@@ -2973,7 +2973,7 @@
},
"name": {
"type": "string",
- "example": "0.1.39-linux-amd64-avx-cuda-11-7"
+ "example": "llama-b4920-bin-linux-avx-cuda-cu11.7"
},
"size": {
"type": "integer",
@@ -3250,7 +3250,7 @@
},
"version": {
"type": "string",
- "example": "0.1.35-28.10.24"
+ "example": "b4920"
}
}
}
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index f7a20b58b..39052b08e 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -182,6 +182,7 @@ add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/process/utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/extensions/local-engine/local_engine.cc
)
@@ -227,3 +228,12 @@ set_target_properties(${TARGET_NAME} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
)
+
+if(MSVC)
+ add_custom_command(
+ TARGET ${TARGET_NAME} POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_directory
+ ${CMAKE_CURRENT_SOURCE_DIR}/../.github/patches/windows
+ ${CMAKE_BINARY_DIR}/
+ )
+endif()
\ No newline at end of file
diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt
index 4163042d0..bb18433fe 100644
--- a/engine/cli/CMakeLists.txt
+++ b/engine/cli/CMakeLists.txt
@@ -73,7 +73,7 @@ add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/database_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc
-
+ ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/local-engine/local_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc
diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc
index 99f51983e..aa0b9aab4 100644
--- a/engine/cli/command_line_parser.cc
+++ b/engine/cli/command_line_parser.cc
@@ -33,6 +33,7 @@
#include "services/engine_service.h"
#include "utils/file_manager_utils.h"
#include "utils/logging_utils.h"
+#include "utils/task_queue.h"
namespace {
constexpr const auto kCommonCommandsGroup = "Common Commands";
@@ -50,8 +51,7 @@ CommandLineParser::CommandLineParser()
download_service_{std::make_shared()},
dylib_path_manager_{std::make_shared()},
db_service_{std::make_shared()},
- engine_service_{std::make_shared(
- download_service_, dylib_path_manager_, db_service_)} {}
+ engine_service_{std::make_shared(dylib_path_manager_)} {}
bool CommandLineParser::SetupCommand(int argc, char** argv) {
app_.usage("Usage:\n" + commands::GetCortexBinary() +
diff --git a/engine/cli/commands/cortex_upd_cmd.cc b/engine/cli/commands/cortex_upd_cmd.cc
index e11ad4290..33a51ed53 100644
--- a/engine/cli/commands/cortex_upd_cmd.cc
+++ b/engine/cli/commands/cortex_upd_cmd.cc
@@ -532,10 +532,10 @@ bool CortexUpdCmd::GetLinuxInstallScript(const std::string& v,
const std::string& channel) {
std::vector path_list;
if (channel == "nightly") {
- path_list = {"menloresearch", "cortex.cpp", "dev", "engine",
+ path_list = {kMenloOrg, "cortex.cpp", "dev", "engine",
"templates", "linux", "install.sh"};
} else {
- path_list = {"menloresearch", "cortex.cpp", "main", "engine",
+ path_list = {kMenloOrg, "cortex.cpp", "main", "engine",
"templates", "linux", "install.sh"};
}
auto url_obj = url_parser::Url{
diff --git a/engine/cli/commands/cortex_upd_cmd.h b/engine/cli/commands/cortex_upd_cmd.h
index 7f02839cf..fdee6cc49 100644
--- a/engine/cli/commands/cortex_upd_cmd.h
+++ b/engine/cli/commands/cortex_upd_cmd.h
@@ -79,9 +79,9 @@ inline std::vector GetReleasePath() {
if (CORTEX_VARIANT == file_manager_utils::kNightlyVariant) {
return {"cortex", "latest", "version.json"};
} else if (CORTEX_VARIANT == file_manager_utils::kBetaVariant) {
- return {"repos", "menloresearch", "cortex.cpp", "releases"};
+ return {"repos", kMenloOrg, "cortex.cpp", "releases"};
} else {
- return {"repos", "menloresearch", "cortex.cpp", "releases", "latest"};
+ return {"repos", kMenloOrg, "cortex.cpp", "releases", "latest"};
}
}
diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc
index bebfdb8ce..b31aecaa6 100644
--- a/engine/cli/commands/engine_install_cmd.cc
+++ b/engine/cli/commands/engine_install_cmd.cc
@@ -92,7 +92,10 @@ bool EngineInstallCmd::Exec(const std::string& engine,
std::vector variant_selections;
for (const auto& variant : variant_result.value()) {
auto v_name = variant["name"].asString();
- if (string_utils::StringContainsIgnoreCase(v_name, hw_inf_.sys_inf->os) &&
+ if ((string_utils::StringContainsIgnoreCase(v_name,
+ hw_inf_.sys_inf->os) ||
+ (hw_inf_.sys_inf->os == kLinuxOs &&
+ string_utils::StringContainsIgnoreCase(v_name, kUbuntuOs))) &&
string_utils::StringContainsIgnoreCase(v_name,
hw_inf_.sys_inf->arch)) {
variant_selections.push_back(variant["name"].asString());
diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
index af2d647e2..e074ee18a 100644
--- a/engine/cli/commands/server_start_cmd.cc
+++ b/engine/cli/commands/server_start_cmd.cc
@@ -106,10 +106,8 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
#else
std::vector commands;
// Some engines requires to add lib search path before process being created
- auto download_srv = std::make_shared();
- auto dylib_path_mng = std::make_shared();
- auto db_srv = std::make_shared();
- EngineService(download_srv, dylib_path_mng, db_srv).RegisterEngineLibPath();
+ EngineService(std::make_shared())
+ .RegisterEngineLibPath();
std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
commands.push_back(p);
diff --git a/engine/cli/main.cc b/engine/cli/main.cc
index a4e6c38cc..1fa45d6fd 100644
--- a/engine/cli/main.cc
+++ b/engine/cli/main.cc
@@ -155,7 +155,7 @@ int main(int argc, char* argv[]) {
auto get_latest_version = []() -> cpp::result {
try {
auto res = github_release_utils::GetReleaseByVersion(
- "menloresearch", "cortex.llamacpp", "latest");
+ kGgmlOrg, kLlamaRepo, "latest");
if (res.has_error()) {
CTL_ERR("Failed to get latest llama.cpp version: " << res.error());
return cpp::fail("Failed to get latest llama.cpp version: " +
diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc
index 7538fff46..32cc6e20a 100644
--- a/engine/cli/utils/download_progress.cc
+++ b/engine/cli/utils/download_progress.cc
@@ -83,8 +83,8 @@ bool DownloadProgress::Handle(
size_t max_length = 20) -> std::string {
// Check the length of the input string
if (str.length() >= max_length) {
- return str.substr(
- 0, max_length); // Return truncated string if it's too long
+ return str.substr(0, max_length - 3) +
+ ".. "; // Return truncated string if it's too long
}
// Calculate the number of spaces needed
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
index 9650ffdcc..38128e1c4 100644
--- a/engine/config/yaml_config.cc
+++ b/engine/config/yaml_config.cc
@@ -48,7 +48,7 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) {
if (!yaml_node_["mmproj"]) {
auto s = nomalize_path(file_path);
auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf";
- CTL_DBG("mmproj: " << abs_path);
+ CTL_TRC("mmproj: " << abs_path);
auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path));
if (std::filesystem::exists(abs_path)) {
yaml_node_["mmproj"] = rel_path.string();
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index f7deb41eb..2a9427abf 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -155,6 +155,7 @@ void Engines::GetEngineVariants(
releases.append(json.value());
}
}
+ CTL_INF(releases.toStyledString());
auto resp = cortex_utils::CreateCortexHttpJsonResponse(releases);
resp->setStatusCode(k200OK);
callback(resp);
@@ -177,6 +178,8 @@ void Engines::InstallEngine(
}
norm_version = version;
}
+ CTL_INF("version: " << norm_version
+ << ", norm_variant: " << norm_variant.value_or(""));
auto result =
engine_service_->InstallEngineAsync(engine, norm_version, norm_variant);
diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc
index 079b69423..3ba4aa327 100644
--- a/engine/controllers/server.cc
+++ b/engine/controllers/server.cc
@@ -138,7 +138,7 @@ void server::ProcessStreamRes(std::function cb,
auto err_or_done = std::make_shared(false);
auto chunked_content_provider = [this, q, err_or_done, engine_type, model_id](
char* buf,
- std::size_t buf_size) -> std::size_t {
+ std::size_t buf_size) -> std::size_t {
if (buf == nullptr) {
LOG_TRACE << "Buf is null";
if (!(*err_or_done)) {
@@ -179,7 +179,6 @@ void server::ProcessStreamRes(std::function cb,
void server::ProcessNonStreamRes(std::function cb,
SyncQueue& q) {
auto [status, res] = q.wait_and_pop();
- function_calling_utils::PostProcessResponse(res);
LOG_DEBUG << "response: " << res.toStyledString();
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
resp->setStatusCode(
diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h
index b796ebaed..2518b0ce5 100644
--- a/engine/cortex-common/EngineI.h
+++ b/engine/cortex-common/EngineI.h
@@ -47,9 +47,6 @@ class EngineI {
std::shared_ptr json_body,
std::function&& callback) = 0;
- // For backward compatible checking
- virtual bool IsSupported(const std::string& f) = 0;
-
// Get list of running models
virtual void GetModels(
std::shared_ptr jsonBody,
diff --git a/engine/cortex-common/remote_enginei.h b/engine/cortex-common/remote_enginei.h
index 835f526a0..163490cdc 100644
--- a/engine/cortex-common/remote_enginei.h
+++ b/engine/cortex-common/remote_enginei.h
@@ -1,7 +1,5 @@
#pragma once
-#pragma once
-
#include
#include
diff --git a/engine/e2e-test/api/engines/test_api_engine.py b/engine/e2e-test/api/engines/test_api_engine.py
index 7356ef904..842ef2c35 100644
--- a/engine/e2e-test/api/engines/test_api_engine.py
+++ b/engine/e2e-test/api/engines/test_api_engine.py
@@ -28,14 +28,14 @@ def test_engines_get_llamacpp_should_be_successful(self):
# engines install
def test_engines_install_llamacpp_specific_version_and_variant(self):
- data = {"version": "v0.1.40-b4354", "variant": "linux-amd64-avx"}
+ data = {"version": "b4932", "variant": "linux-avx-x64"}
response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
assert response.status_code == 200
def test_engines_install_llamacpp_specific_version_and_null_variant(self):
- data = {"version": "v0.1.40-b4354"}
+ data = {"version": "b4932"}
response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
@@ -55,14 +55,14 @@ async def test_engines_install_uninstall_llamacpp_should_be_successful(self):
@pytest.mark.asyncio
async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_failed(self):
# install first
- data = {"variant": "mac-arm64"}
+ data = {"variant": "linux-avx-x64"}
install_response = requests.post(
"http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
)
await wait_for_websocket_download_success_event(timeout=120)
assert install_response.status_code == 200
- data = {"version": "v0.1.35"}
+ data = {"version": "b4932"}
response = requests.delete(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
@@ -72,7 +72,7 @@ async def test_engines_install_uninstall_llamacpp_with_only_version_should_be_fa
@pytest.mark.asyncio
async def test_engines_install_uninstall_llamacpp_with_variant_should_be_successful(self):
# install first
- data = {"variant": "mac-arm64"}
+ data = {"variant": "linux-avx-x64"}
install_response = requests.post(
"http://127.0.0.1:3928/v1/engines/llama-cpp/install", json=data
)
@@ -85,7 +85,7 @@ async def test_engines_install_uninstall_llamacpp_with_variant_should_be_success
def test_engines_install_uninstall_llamacpp_with_specific_variant_and_version_should_be_successful(
self,
):
- data = {"variant": "mac-arm64", "version": "v0.1.35"}
+ data = {"variant": "linux-avx-x64", "version": "b4932"}
# install first
install_response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
diff --git a/engine/e2e-test/api/engines/test_api_engine_install_nightly.py b/engine/e2e-test/api/engines/test_api_engine_install_nightly.py
index e92afb14b..088cc2474 100644
--- a/engine/e2e-test/api/engines/test_api_engine_install_nightly.py
+++ b/engine/e2e-test/api/engines/test_api_engine_install_nightly.py
@@ -2,7 +2,7 @@
import requests
from utils.test_runner import start_server, stop_server, get_latest_pre_release_tag
-latest_pre_release_tag = get_latest_pre_release_tag("menloresearch", "cortex.llamacpp")
+latest_pre_release_tag = get_latest_pre_release_tag("menloresearch", "llama.cpp")
class TestApiEngineInstall:
@@ -23,7 +23,7 @@ def test_engines_install_llamacpp_should_be_successful(self):
assert response.status_code == 200
def test_engines_install_llamacpp_specific_version_and_variant(self):
- data = {"version": latest_pre_release_tag, "variant": "linux-amd64-avx"}
+ data = {"version": latest_pre_release_tag, "variant": "linux-avx-x64"}
response = requests.post(
"http://localhost:3928/v1/engines/llama-cpp/install", json=data
)
diff --git a/engine/e2e-test/api/engines/test_api_get_default_engine.py b/engine/e2e-test/api/engines/test_api_get_default_engine.py
index 2dfc467a3..f0566128c 100644
--- a/engine/e2e-test/api/engines/test_api_get_default_engine.py
+++ b/engine/e2e-test/api/engines/test_api_get_default_engine.py
@@ -24,8 +24,8 @@ def setup_and_teardown(self):
def test_api_get_default_engine_successfully(self):
# Data test
engine= "llama-cpp"
- name= "linux-amd64-avx"
- version= "v0.1.35-27.10.24"
+ name= "linux-avx-x64"
+ version= "b4932"
data = {"version": version, "variant": name}
post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
diff --git a/engine/e2e-test/api/engines/test_api_get_list_engine.py b/engine/e2e-test/api/engines/test_api_get_list_engine.py
index e6baa22a6..38cb45b39 100644
--- a/engine/e2e-test/api/engines/test_api_get_list_engine.py
+++ b/engine/e2e-test/api/engines/test_api_get_list_engine.py
@@ -24,8 +24,8 @@ def setup_and_teardown(self):
def test_api_get_list_engines_successfully(self):
# Data test
engine= "llama-cpp"
- name= "linux-amd64-avx"
- version= "v0.1.35-27.10.24"
+ name= "linux-avx-x64"
+ version= "b4932"
post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
response = requests.delete(
diff --git a/engine/e2e-test/api/engines/test_api_post_default_engine.py b/engine/e2e-test/api/engines/test_api_post_default_engine.py
index b2b4e4c48..cede78485 100644
--- a/engine/e2e-test/api/engines/test_api_post_default_engine.py
+++ b/engine/e2e-test/api/engines/test_api_post_default_engine.py
@@ -23,8 +23,8 @@ def setup_and_teardown(self):
def test_api_set_default_engine_successfully(self):
# Data test
engine= "llama-cpp"
- name= "linux-amd64-avx"
- version= "v0.1.35-27.10.24"
+ name= "linux-avx-x64"
+ version= "b4932"
data = {"version": version, "variant": name}
post_install_url = f"http://localhost:3928/v1/engines/{engine}/install"
diff --git a/engine/e2e-test/api/files/test_api_create_file.py b/engine/e2e-test/api/files/test_api_create_file.py
index 7c7226f50..03525672d 100644
--- a/engine/e2e-test/api/files/test_api_create_file.py
+++ b/engine/e2e-test/api/files/test_api_create_file.py
@@ -23,7 +23,6 @@ def setup_and_teardown(self):
# Teardown
stop_server()
- @pytest.mark.skipif(platform.system() != "Linux", reason="Todo: fix later on Mac and Window")
def test_api_create_file_successfully(self):
# Define file path
file_path_rel = os.path.join("e2e-test", "api", "files", "blank.txt")
diff --git a/engine/e2e-test/api/hardware/test_api_get_hardware.py b/engine/e2e-test/api/hardware/test_api_get_hardware.py
index 59b15ac18..0efecdbdc 100644
--- a/engine/e2e-test/api/hardware/test_api_get_hardware.py
+++ b/engine/e2e-test/api/hardware/test_api_get_hardware.py
@@ -88,25 +88,6 @@ def test_api_get_hardware_successfully(self):
"example": True,
"description": "Indicates if the GPU is currently activated."
},
- "additional_information": {
- "type": "object",
- "properties": {
- "compute_cap": {
- "type": "string",
- "example": "8.6",
- "description": "The compute capability of the GPU."
- },
- "driver_version": {
- "type": "string",
- "example": "535.183",
- "description": "The version of the installed driver."
- }
- },
- "required": [
- "compute_cap",
- "driver_version"
- ]
- },
"free_vram": {
"type": "integer",
"example": 23983,
@@ -140,7 +121,6 @@ def test_api_get_hardware_successfully(self):
},
"required": [
"activated",
- "additional_information",
"free_vram",
"id",
"name",
diff --git a/engine/e2e-test/api/model/test_api_model.py b/engine/e2e-test/api/model/test_api_model.py
index bacf7e1b0..f370b1daa 100644
--- a/engine/e2e-test/api/model/test_api_model.py
+++ b/engine/e2e-test/api/model/test_api_model.py
@@ -1,6 +1,7 @@
import pytest
import requests
import time
+import platform
from utils.test_runner import (
run,
start_server,
@@ -95,6 +96,7 @@ async def test_models_start_stop_should_be_successful(self):
time.sleep(30)
print("Pull model")
+ requests.delete("http://localhost:3928/v1/models/tinyllama:1b")
json_body = {"model": "tinyllama:1b"}
response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
@@ -110,16 +112,18 @@ async def test_models_start_stop_should_be_successful(self):
response = requests.get("http://localhost:3928/v1/models")
assert response.status_code == 200
- print("Start model")
- json_body = {"model": "tinyllama:1b"}
- response = requests.post(
- "http://localhost:3928/v1/models/start", json=json_body
- )
- assert response.status_code == 200, f"status_code: {response.status_code}"
+ # Skip tests for linux arm
+ if platform.machine() != "aarch64":
+ print("Start model")
+ json_body = {"model": "tinyllama:1b"}
+ response = requests.post(
+ "http://localhost:3928/v1/models/start", json=json_body
+ )
+ assert response.status_code == 200, f"status_code: {response.status_code}"
- print("Stop model")
- response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
- assert response.status_code == 200, f"status_code: {response.status_code}"
+ print("Stop model")
+ response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
+ assert response.status_code == 200, f"status_code: {response.status_code}"
# update API
print("Update model")
diff --git a/engine/e2e-test/cli/engines/test_cli_engine_install.py b/engine/e2e-test/cli/engines/test_cli_engine_install.py
index 370ebe3f3..5d520ce8b 100644
--- a/engine/e2e-test/cli/engines/test_cli_engine_install.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_install.py
@@ -31,25 +31,9 @@ def test_engines_install_llamacpp_should_be_successfully(self):
assert len(response.json()) > 0
assert exit_code == 0, f"Install engine failed with error: {error}"
- @pytest.mark.skipif(reason="Ignore onnx-runtime test")
- def test_engines_install_onnx_on_macos_should_be_failed(self):
- exit_code, output, error = run(
- "Install Engine", ["engines", "install", "onnxruntime"]
- )
- assert "is not supported on" in output, "Should display error message"
- assert exit_code == 0, f"Install engine failed with error: {error}"
-
- @pytest.mark.skipif(reason="Ignore tensorrt-llm test")
- def test_engines_install_onnx_on_tensorrt_should_be_failed(self):
- exit_code, output, error = run(
- "Install Engine", ["engines", "install", "tensorrt-llm"]
- )
- assert "is not supported on" in output, "Should display error message"
- assert exit_code == 0, f"Install engine failed with error: {error}"
-
@pytest.mark.skipif(platform.system() == "Windows", reason="Progress bar log issue on Windows")
def test_engines_install_pre_release_llamacpp(self):
- engine_version = "v0.1.43"
+ engine_version = "b4932"
exit_code, output, error = run(
"Install Engine",
["engines", "install", "llama-cpp", "-v", engine_version],
diff --git a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
index 8672110e2..3198c81a5 100644
--- a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py
@@ -24,7 +24,10 @@ def setup_and_teardown(self):
@pytest.mark.asyncio
async def test_engines_uninstall_llamacpp_should_be_successfully(self):
- response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+ data = {"version": "b5371"}
+ response = requests.post(
+ "http://localhost:3928/v1/engines/llama-cpp/install", json=data
+ )
await wait_for_websocket_download_success_event(timeout=None)
exit_code, output, error = run(
"Uninstall engine", ["engines", "uninstall", "llama-cpp"]
diff --git a/engine/e2e-test/cli/model/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py
index aa6e99e4a..cd80a9e2b 100644
--- a/engine/e2e-test/cli/model/test_cli_model.py
+++ b/engine/e2e-test/cli/model/test_cli_model.py
@@ -36,6 +36,7 @@ def setup_and_teardown(self):
run("Delete model", ["models", "delete", "tinyllama:1b"])
stop_server()
+ @pytest.mark.skipif(platform.system() == "Windows", reason="Skip test for Windows")
def test_model_pull_with_direct_url_should_be_success(self):
exit_code, output, error = run(
"Pull model",
diff --git a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
index 9fc296d60..ea3cae242 100644
--- a/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
+++ b/engine/e2e-test/runner/cortex-llamacpp-e2e-nightly.py
@@ -21,7 +21,7 @@
from api.engines.test_api_get_default_engine import TestApiDefaultEngine
from api.engines.test_api_get_engine_release import TestApiEngineRelease
from api.engines.test_api_get_engine_release_latest import TestApiEngineReleaseLatest
-from test_api_post_default_engine import TestApiSetDefaultEngine
+from api.engines.test_api_post_default_engine import TestApiSetDefaultEngine
from api.model.test_api_model import TestApiModel
from api.model.test_api_model_import import TestApiModelImport
from api.files.test_api_create_file import TestApiCreateFile
diff --git a/engine/e2e-test/runner/main.py b/engine/e2e-test/runner/main.py
index 49bdc5131..8a98d0ca3 100644
--- a/engine/e2e-test/runner/main.py
+++ b/engine/e2e-test/runner/main.py
@@ -21,7 +21,7 @@
from api.engines.test_api_get_default_engine import TestApiDefaultEngine
from api.engines.test_api_get_engine_release import TestApiEngineRelease
from api.engines.test_api_get_engine_release_latest import TestApiEngineReleaseLatest
-from test_api_post_default_engine import TestApiSetDefaultEngine
+from api.engines.test_api_post_default_engine import TestApiSetDefaultEngine
from api.model.test_api_model import TestApiModel
from api.model.test_api_model_import import TestApiModelImport
from api.files.test_api_create_file import TestApiCreateFile
diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
index 7a3c2e232..a22000d93 100644
--- a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
+++ b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
@@ -125,7 +125,7 @@ async def test_models_on_cortexso_hub(self, model_url):
"Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False
)
root = Path.home()
- assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
+ assert os.path.exists(root / "cortexcpp" / "engines" / "llama.cpp" / "version.txt")
assert exit_code == 0, f"Install engine failed with error: {error}"
# Start the model
diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc
new file mode 100644
index 000000000..74bf0d1b8
--- /dev/null
+++ b/engine/extensions/local-engine/local_engine.cc
@@ -0,0 +1,1087 @@
+#include "local_engine.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include "utils/curl_utils.h"
+#include "utils/json_helper.h"
+#include "utils/logging_utils.h"
+#include "utils/process/utils.h"
+#include "utils/url_parser.h"
+
+namespace cortex::local {
+
+namespace {
+const std::unordered_set kIgnoredParams = {
+ "model", "model_alias", "embedding", "ai_prompt",
+ "ai_template", "prompt_template", "mmproj", "system_prompt",
+ "created", "stream", "name", "os",
+ "owned_by", "files", "gpu_arch", "quantization_method",
+ "engine", "system_template", "max_tokens", "user_template",
+ "user_prompt", "min_keep", "mirostat", "mirostat_eta",
+ "mirostat_tau", "text_model", "version", "n_probs",
+ "object", "penalize_nl", "precision", "size",
+ "stop", "tfs_z", "typ_p", "caching_enabled"};
+
+const std::unordered_map kParamsMap = {
+ {"cpu_threads", "--threads"},
+ {"n_ubatch", "--ubatch-size"},
+ {"n_batch", "--batch-size"},
+ {"n_parallel", "--parallel"},
+ {"temperature", "--temp"},
+ {"top_k", "--top-k"},
+ {"top_p", "--top-p"},
+ {"min_p", "--min-p"},
+ {"dynatemp_exponent", "--dynatemp-exp"},
+ {"ctx_len", "--ctx-size"},
+ {"ngl", "-ngl"},
+ {"reasoning_budget", "--reasoning-budget"},
+};
+
+int GenerateRandomInteger(int min, int max) {
+ static std::random_device rd; // Seed for the random number engine
+ static std::mt19937 gen(rd()); // Mersenne Twister random number engine
+ std::uniform_int_distribution<> dis(
+ min, max); // Distribution for the desired range
+
+ return dis(gen);
+}
+
+std::vector ConvertJsonToParamsVector(const Json::Value& root) {
+ std::vector res;
+ std::string errors;
+ res.push_back("--no-webui");
+
+ for (const auto& member : root.getMemberNames()) {
+ if (member == "model_path" || member == "llama_model_path") {
+ if (!root[member].isNull()) {
+ const std::string path = root[member].asString();
+ res.push_back("--model");
+ res.push_back(path);
+
+ // If path contains both "Jan" and "nano", case-insensitive, add special params
+ std::string lowered = path;
+ std::transform(lowered.begin(), lowered.end(), lowered.begin(), [](unsigned char c) {
+ return std::tolower(c);
+ });
+ }
+ continue;
+ } else if (kIgnoredParams.find(member) != kIgnoredParams.end()) {
+ continue;
+ } else if (kParamsMap.find(member) != kParamsMap.end()) {
+ res.push_back(kParamsMap.at(member));
+ res.push_back(root[member].asString());
+ continue;
+ } else if (member == "model_type") {
+ if (root[member].asString() == "embedding") {
+ res.push_back("--embedding");
+ }
+ continue;
+ } else if (member == "cache_type") {
+ if (!root[member].isNull()) {
+ res.push_back("-ctk");
+ res.push_back(root[member].asString());
+ res.push_back("-ctv");
+ res.push_back(root[member].asString());
+ }
+ continue;
+ } else if (member == "use_mmap") {
+ if (!root[member].asBool()) {
+ res.push_back("--no-mmap");
+ }
+ continue;
+ } else if (member == "ignore_eos") {
+ if (root[member].asBool()) {
+ res.push_back("--ignore_eos");
+ }
+ continue;
+ } else if (member == "ctx_len") {
+ if (!root[member].isNull()) {
+ res.push_back("--ctx-size");
+ res.push_back(root[member].asString());
+ }
+ continue;
+ }
+
+ // Generic handling for other members
+ res.push_back("--" + member);
+ if (root[member].isString()) {
+ res.push_back(root[member].asString());
+ } else if (root[member].isInt()) {
+ res.push_back(std::to_string(root[member].asInt()));
+ } else if (root[member].isDouble()) {
+ res.push_back(std::to_string(root[member].asDouble()));
+ } else if (root[member].isArray()) {
+ std::stringstream ss;
+ ss << "[";
+ bool first = true;
+ for (const auto& value : root[member]) {
+ if (!first) {
+ ss << ", ";
+ }
+ ss << "\"" << value.asString() << "\"";
+ first = false;
+ }
+ ss << "]";
+ res.push_back(ss.str());
+ }
+ }
+
+ return res;
+}
+
+
+constexpr const auto kMinDataChunkSize = 6u;
+
+struct OaiInfo {
+ std::string model;
+ bool include_usage = false;
+ bool oai_endpoint = false;
+ int n_probs = 0;
+};
+
+struct StreamingCallback {
+ std::shared_ptr callback;
+ bool need_stop = true;
+ OaiInfo oi;
+};
+
+struct Usage {
+ int prompt_tokens = 0;
+ int completion_tokens = 0;
+};
+
+std::string GenerateRandomString(std::size_t length) {
+ const std::string characters =
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+ std::random_device rd;
+ std::mt19937 generator(rd());
+
+ std::uniform_int_distribution<> distribution(
+ 0, static_cast(characters.size()) - 1);
+
+ std::string random_string(length, '\0');
+ std::generate_n(random_string.begin(), length,
+ [&]() { return characters[distribution(generator)]; });
+
+ return random_string;
+}
+
+std::vector GetUTF8Bytes(const std::string& str) {
+ std::vector bytes;
+ for (unsigned char c : str) {
+ bytes.push_back(static_cast(c));
+ }
+ return bytes;
+}
+
+Json::Value TransformLogProbs(const Json::Value& logprobs) {
+ Json::Value root;
+ Json::Value logprobs_json(Json::arrayValue);
+
+ // Iterate through each token group in the input
+ for (const auto& token_group : logprobs) {
+ Json::Value content_item;
+
+ // Set the token (content)
+ content_item["token"] = token_group["content"].asString();
+
+ // Get the probabilities array
+ const auto& probs = token_group["probs"];
+
+ // Set the main token's logprob (first probability)
+ if (!probs.empty()) {
+ content_item["logprob"] = std::log(
+ probs[0]["prob"].asDouble() + std::numeric_limits::epsilon());
+ }
+
+ // Get UTF-8 bytes for the token
+ auto bytes = GetUTF8Bytes(token_group["content"].asString());
+ Json::Value bytes_array(Json::arrayValue);
+ for (int byte : bytes) {
+ bytes_array.append(byte);
+ }
+ content_item["bytes"] = bytes_array;
+
+ // Create top_logprobs array
+ Json::Value top_logprobs(Json::arrayValue);
+ for (const auto& prob_item : probs) {
+ Json::Value logprob_item;
+ logprob_item["token"] = prob_item["tok_str"].asString();
+ logprob_item["logprob"] =
+ std::log(prob_item["prob"].asDouble() +
+ std::numeric_limits::epsilon());
+
+ // Get UTF-8 bytes for this alternative token
+ auto alt_bytes = GetUTF8Bytes(prob_item["tok_str"].asString());
+ Json::Value alt_bytes_array(Json::arrayValue);
+ for (int byte : alt_bytes) {
+ alt_bytes_array.append(byte);
+ }
+ logprob_item["bytes"] = alt_bytes_array;
+
+ top_logprobs.append(logprob_item);
+ }
+ content_item["top_logprobs"] = top_logprobs;
+
+ logprobs_json.append(content_item);
+ }
+ root["content"] = logprobs_json;
+ return root;
+}
+
+std::string CreateReturnJson(
+ const std::string& id, const std::string& model, const std::string& content,
+ Json::Value finish_reason, bool include_usage,
+ std::optional usage = std::nullopt,
+ std::optional logprobs = std::nullopt) {
+ Json::Value root;
+
+ root["id"] = id;
+ root["model"] = model;
+ root["created"] = static_cast(std::time(nullptr));
+ root["object"] = "chat.completion.chunk";
+
+ Json::Value choicesArray(Json::arrayValue);
+ // If usage, the choices field will always be an empty array
+ if (!usage) {
+ Json::Value choice;
+
+ choice["index"] = 0;
+ Json::Value delta;
+ delta["content"] = content;
+ delta["role"] = "assistant";
+ choice["delta"] = delta;
+ choice["finish_reason"] = finish_reason;
+ if (logprobs.has_value() && !logprobs.value().empty()) {
+ choice["logprobs"] = TransformLogProbs(logprobs.value());
+ }
+
+ choicesArray.append(choice);
+ }
+ root["choices"] = choicesArray;
+ if (include_usage) {
+ if (usage) {
+ Json::Value usage_json;
+ Json::Value details;
+ details["reasoning_tokens"] = 0;
+ usage_json["prompt_tokens"] = (*usage).prompt_tokens;
+ usage_json["completion_tokens"] = (*usage).completion_tokens;
+ usage_json["total_tokens"] =
+ (*usage).prompt_tokens + (*usage).completion_tokens;
+ usage_json["completion_tokens_details"] = details;
+ root["usage"] = usage_json;
+ } else {
+ root["usage"] = Json::Value();
+ }
+ }
+
+ Json::StreamWriterBuilder writer;
+ writer["indentation"] = ""; // This sets the indentation to an empty string,
+ // producing compact output.
+ return Json::writeString(writer, root);
+}
+
+size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) {
+ auto* sc = static_cast(userdata);
+ size_t data_length = size * nmemb;
+
+ if (ptr && data_length > kMinDataChunkSize) {
+ std::string chunk(ptr + kMinDataChunkSize, data_length - kMinDataChunkSize);
+ CTL_DBG(chunk);
+ if (sc->oi.oai_endpoint) {
+ if (chunk.find("[DONE]") != std::string::npos) {
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = true;
+ status["status_code"] = 200;
+ Json::Value chunk_json;
+ chunk_json["data"] = "data: [DONE]";
+ sc->need_stop = false;
+ (*sc->callback)(std::move(status), std::move(chunk_json));
+ return data_length;
+ }
+ if (!sc->oi.include_usage &&
+ chunk.find("completion_tokens") != std::string::npos) {
+ return data_length;
+ }
+
+ Json::Value chunk_json;
+ chunk_json["data"] = "data: " + chunk;
+ Json::Value status;
+ status["is_done"] = false;
+ status["has_error"] = false;
+ status["is_stream"] = true;
+ status["status_code"] = 200;
+ (*sc->callback)(std::move(status), std::move(chunk_json));
+ } else {
+ if (chunk.find("[DONE]") != std::string::npos) {
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = true;
+ status["status_code"] = 200;
+ Json::Value chunk_json;
+ chunk_json["data"] = "data: [DONE]";
+ sc->need_stop = false;
+ (*sc->callback)(std::move(status), std::move(chunk_json));
+ return data_length;
+ }
+ auto json_data = json_helper::ParseJsonString(chunk);
+ // DONE
+ if (!json_data.isNull() && json_data.isMember("timings")) {
+ std::optional u;
+ if (sc->oi.include_usage) {
+ u = Usage{json_data["tokens_evaluated"].asInt(),
+ json_data["tokens_predicted"].asInt()};
+ }
+
+ Json::Value chunk_json;
+ chunk_json["data"] =
+ "data: " + CreateReturnJson(GenerateRandomString(20), sc->oi.model,
+ "", "stop", sc->oi.include_usage, u);
+ Json::Value status;
+ status["is_done"] = false;
+ status["has_error"] = false;
+ status["is_stream"] = true;
+ status["status_code"] = 200;
+ (*sc->callback)(std::move(status), std::move(chunk_json));
+
+ sc->need_stop = false;
+ return data_length;
+ }
+
+ Json::Value logprobs;
+ if (sc->oi.n_probs > 0) {
+ logprobs = json_data["completion_probabilities"];
+ }
+ std::string to_send;
+ if (json_data.isMember("choices") && json_data["choices"].isArray() &&
+ json_data["choices"].size() > 0) {
+ to_send = json_data["choices"][0].get("text", "").asString();
+ }
+ CTL_DBG(to_send);
+ const std::string str =
+ CreateReturnJson(GenerateRandomString(20), sc->oi.model, to_send, "",
+ sc->oi.include_usage, std::nullopt, logprobs);
+ Json::Value chunk_json;
+ chunk_json["data"] = "data: " + str;
+ Json::Value status;
+ status["is_done"] = false;
+ status["has_error"] = false;
+ status["is_stream"] = true;
+ status["status_code"] = 200;
+ (*sc->callback)(std::move(status), std::move(chunk_json));
+ return data_length;
+ }
+ }
+
+ return data_length;
+}
+
+Json::Value ConvertLogitBiasToArray(const Json::Value& input) {
+ Json::Value result(Json::arrayValue);
+ if (input.isObject()) {
+ const auto& member_names = input.getMemberNames();
+ for (const auto& tokenStr : member_names) {
+ Json::Value pair(Json::arrayValue);
+ pair.append(std::stoi(tokenStr));
+ pair.append(input[tokenStr].asFloat());
+ result.append(pair);
+ }
+ }
+ return result;
+}
+
+Json::Value CreateFullReturnJson(
+ const std::string& id, const std::string& model, const std::string& content,
+ const std::string& system_fingerprint, int prompt_tokens,
+ int completion_tokens, Json::Value finish_reason = Json::Value(),
+ std::optional logprobs = std::nullopt) {
+ Json::Value root;
+
+ root["id"] = id;
+ root["model"] = model;
+ root["created"] = static_cast(std::time(nullptr));
+ root["object"] = "chat.completion";
+ root["system_fingerprint"] = system_fingerprint;
+
+ Json::Value choicesArray(Json::arrayValue);
+ Json::Value choice;
+
+ choice["index"] = 0;
+ Json::Value message;
+ message["role"] = "assistant";
+ message["content"] = content;
+ choice["message"] = message;
+ choice["finish_reason"] = finish_reason;
+ if (logprobs.has_value() && !logprobs.value().empty()) {
+ choice["logprobs"] = TransformLogProbs(logprobs.value());
+ }
+
+ choicesArray.append(choice);
+ root["choices"] = choicesArray;
+
+ Json::Value usage;
+ usage["prompt_tokens"] = prompt_tokens;
+ usage["completion_tokens"] = completion_tokens;
+ usage["total_tokens"] = prompt_tokens + completion_tokens;
+ root["usage"] = usage;
+
+ return root;
+}
+
+} // namespace
+
+LocalEngine::~LocalEngine() {
+ for (auto& [_, si] : server_map_) {
+ (void)cortex::process::KillProcess(si.process_info);
+ }
+ server_map_.clear();
+}
+void LocalEngine::HandleChatCompletion(std::shared_ptr json_body,
+ http_callback&& callback) {
+ auto model_id = json_body->get("model", "").asString();
+ if (model_id.empty()) {
+ CTL_WRN("Model is empty");
+ }
+ if (server_map_.find(model_id) != server_map_.end()) {
+ auto& s = server_map_[model_id];
+ auto oaicompat = [&json_body]() -> bool {
+ if (json_body->isMember("logprobs") &&
+ (*json_body)["logprobs"].asBool()) {
+ return false;
+ }
+ return true;
+ }();
+ if (oaicompat) {
+ HandleOpenAiChatCompletion(
+ json_body, const_cast(callback), model_id);
+ } else {
+ HandleNonOpenAiChatCompletion(
+ json_body, const_cast(callback), model_id);
+ }
+ } else {
+ Json::Value error;
+ error["error"] = "Model is not loaded yet: " + model_id;
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 400;
+ callback(std::move(status), std::move(error));
+ }
+}
+
+void LocalEngine::HandleEmbedding(std::shared_ptr json_body,
+ http_callback&& callback) {
+ auto model_id = json_body->get("model", "").asString();
+ if (model_id.empty()) {
+ CTL_WRN("Model is empty");
+ }
+ if (server_map_.find(model_id) != server_map_.end()) {
+ auto& s = server_map_[model_id];
+ auto url = url_parser::Url{
+ /*.protocol*/ "http",
+ /*.host*/ s.host + ":" + std::to_string(s.port),
+ /*.pathParams*/ {"v1", "embeddings"},
+ /* .queries = */ {},
+ };
+
+ auto response = curl_utils::SimplePostJson(url.ToFullPath(),
+ json_body->toStyledString());
+
+ if (response.has_error()) {
+ CTL_WRN("Error: " << response.error());
+ Json::Value error;
+ error["error"] = response.error();
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 400;
+ callback(std::move(status), std::move(error));
+ } else {
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = false;
+ status["status_code"] = 200;
+ callback(std::move(status), std::move(response.value()));
+ }
+ } else {
+ Json::Value error;
+ error["error"] = "Model is not loaded yet: " + model_id;
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 400;
+ callback(std::move(status), std::move(error));
+ }
+}
+
+void LocalEngine::LoadModel(std::shared_ptr json_body,
+ http_callback&& callback) {
+ auto model_id = json_body->get("model", "").asString();
+ if (model_id.empty()) {
+ CTL_WRN("Model is empty");
+ }
+ if (server_map_.find(model_id) != server_map_.end()) {
+ CTL_INF("Model " << model_id << " is already loaded");
+ Json::Value error;
+ error["error"] = "Model " + model_id + " is already loaded";
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 409;
+ callback(std::move(status), std::move(error));
+ return;
+ }
+
+ CTL_INF("Start loading model");
+ auto wait_for_server_up = [this](const std::string& model,
+ const std::string& host, int port) {
+ auto url = url_parser::Url{
+ /*.protocol*/ "http",
+ /*.host*/ host + ":" + std::to_string(port),
+ /*.pathParams*/ {"health"},
+ /*.queries*/ {},
+ };
+ while (server_map_.find(model) != server_map_.end()) {
+ auto res = curl_utils::SimpleGet(url.ToFullPath());
+ if (res.has_error()) {
+ LOG_INFO << "Wait for server up ..";
+ std::this_thread::sleep_for(std::chrono::seconds(1));
+ } else {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ LOG_DEBUG << "Start to spawn llama-server";
+
+ server_map_[model_id].host = "127.0.0.1";
+ server_map_[model_id].port = GenerateRandomInteger(39400, 39999);
+ auto& s = server_map_[model_id];
+ s.pre_prompt = json_body->get("pre_prompt", "").asString();
+ s.user_prompt = json_body->get("user_prompt", "USER: ").asString();
+ s.ai_prompt = json_body->get("ai_prompt", "ASSISTANT: ").asString();
+ s.system_prompt =
+ json_body->get("system_prompt", "ASSISTANT's RULE: ").asString();
+ std::vector params = ConvertJsonToParamsVector(*json_body);
+ params.push_back("--host");
+ params.push_back(s.host);
+ params.push_back("--port");
+ params.push_back(std::to_string(s.port));
+
+
+ params.push_back("--jinja");
+
+ std::vector v;
+ v.reserve(params.size() + 1);
+ auto engine_dir = engine_service_.GetEngineDirPath(kLlamaRepo);
+ if (engine_dir.has_error()) {
+ CTL_WRN(engine_dir.error());
+ server_map_.erase(model_id);
+ return;
+ }
+ auto exe = (engine_dir.value().first / kLlamaServer).string();
+
+ v.push_back(exe);
+ v.insert(v.end(), params.begin(), params.end());
+ engine_service_.RegisterEngineLibPath();
+
+ auto log_path =
+ (file_manager_utils::GetCortexLogPath() / "logs" / "cortex.log").string();
+ CTL_DBG("log: " << log_path);
+ auto result = cortex::process::SpawnProcess(v, log_path, log_path);
+ if (result.has_error()) {
+ CTL_ERR("Fail to spawn process. " << result.error());
+ Json::Value error;
+ error["error"] = "Fail to spawn process";
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 500;
+ callback(std::move(status), std::move(error));
+ server_map_.erase(model_id);
+ return;
+ }
+
+ s.process_info = result.value();
+ if (wait_for_server_up(model_id, s.host, s.port)) {
+ s.start_time = std::chrono::system_clock::now().time_since_epoch() /
+ std::chrono::milliseconds(1);
+ Json::Value response;
+ response["status"] = "Model loaded successfully with pid: " +
+ std::to_string(s.process_info.pid);
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = false;
+ status["status_code"] = 200;
+ callback(std::move(status), std::move(response));
+ } else {
+ server_map_.erase(model_id);
+ Json::Value error;
+ error["error"] = "Wait for server up timeout";
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 500;
+ callback(std::move(status), std::move(error));
+ }
+}
+
+void LocalEngine::UnloadModel(std::shared_ptr json_body,
+ http_callback&& callback) {
+ auto model_id = json_body->get("model", "").asString();
+ if (model_id.empty()) {
+ CTL_WRN("Model is empty");
+ }
+
+ if (server_map_.find(model_id) != server_map_.end()) {
+ auto& s = server_map_[model_id];
+#if defined(_WIN32) || defined(_WIN64)
+ auto sent = cortex::process::KillProcess(s.process_info);
+#else
+ auto sent = (kill(s.process_info.pid, SIGTERM) != -1);
+#endif
+ if (sent) {
+ LOG_INFO << "SIGINT signal sent to child process";
+ Json::Value response;
+ response["status"] = "Model unloaded successfully with pid: " +
+ std::to_string(s.process_info.pid);
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = false;
+ status["status_code"] = 200;
+ callback(std::move(status), std::move(response));
+ server_map_.erase(model_id);
+ } else {
+ LOG_ERROR << "Failed to send SIGINT signal to child process";
+ Json::Value error;
+ error["error"] = "Failed to unload model: " + model_id;
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 500;
+ callback(std::move(status), std::move(error));
+ }
+ } else {
+ Json::Value error;
+ error["error"] = "Model is not loaded yet: " + model_id;
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 400;
+ callback(std::move(status), std::move(error));
+ }
+}
+
+void LocalEngine::GetModelStatus(std::shared_ptr json_body,
+ http_callback&& callback) {
+ auto model_id = json_body->get("model", "").asString();
+ if (model_id.empty()) {
+ CTL_WRN("Model is empty");
+ }
+ if (server_map_.find(model_id) != server_map_.end()) {
+ Json::Value response;
+ response["status"] = "Model is loaded";
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = false;
+ status["status_code"] = 200;
+ callback(std::move(status), std::move(response));
+ } else {
+ Json::Value error;
+ error["error"] = "Model is not loaded yet: " + model_id;
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 400;
+ callback(std::move(status), std::move(error));
+ }
+}
+
+void LocalEngine::GetModels(std::shared_ptr json_body,
+ http_callback&& callback) {
+ Json::Value json_resp;
+ Json::Value model_array(Json::arrayValue);
+ {
+ for (const auto& [m, s] : server_map_) {
+ Json::Value val;
+ val["id"] = m;
+ val["engine"] = kLlamaEngine;
+ val["start_time"] = s.start_time;
+ val["model_size"] = 0u;
+ val["vram"] = 0u;
+ val["ram"] = 0u;
+ val["object"] = "model";
+ model_array.append(val);
+ }
+ }
+
+ json_resp["object"] = "list";
+ json_resp["data"] = model_array;
+
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = false;
+ status["status_code"] = 200;
+ callback(std::move(status), std::move(json_resp));
+ CTL_INF("Running models responded");
+ (void)json_body;
+}
+
+void LocalEngine::HandleOpenAiChatCompletion(
+ std::shared_ptr json_body, http_callback&& callback,
+ const std::string& model) {
+ CTL_DBG("Hanle OpenAI chat completion");
+ auto is_stream = (*json_body).get("stream", false).asBool();
+ auto include_usage = [&json_body, is_stream]() -> bool {
+ if (is_stream) {
+ if (json_body->isMember("stream_options") &&
+ !(*json_body)["stream_options"].isNull()) {
+ return (*json_body)["stream_options"]
+ .get("include_usage", false)
+ .asBool();
+ }
+ return false;
+ }
+ return false;
+ }();
+
+ auto n = [&json_body, is_stream]() -> int {
+ if (is_stream)
+ return 1;
+ return (*json_body).get("n", 1).asInt();
+ }();
+
+ auto& s = server_map_.at(model);
+ // Format logit_bias
+ if (json_body->isMember("logit_bias")) {
+ auto logit_bias = ConvertLogitBiasToArray((*json_body)["logit_bias"]);
+ (*json_body)["logit_bias"] = logit_bias;
+ }
+ // llama.cpp server only supports n = 1
+ (*json_body)["n"] = 1;
+
+ auto url = url_parser::Url{
+ /*.protocol*/ "http",
+ /*.host*/ s.host + ":" + std::to_string(s.port),
+ /*.pathParams*/ {"v1", "chat", "completions"},
+ /*.queries*/ {},
+ };
+
+ if (is_stream) {
+ q_.RunInQueue([s, json_body, callback, model, url = std::move(url)] {
+ auto curl = curl_easy_init();
+ if (!curl) {
+ CTL_WRN("Failed to initialize CURL");
+ return;
+ }
+
+ curl_easy_setopt(curl, CURLOPT_URL, url.ToFullPath().c_str());
+ curl_easy_setopt(curl, CURLOPT_POST, 1L);
+ CTL_INF(url.ToFullPath());
+
+ struct curl_slist* headers = nullptr;
+ headers = curl_slist_append(headers, "Content-Type: application/json");
+ curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+ auto json_str = json_body->toStyledString();
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDS, json_str.c_str());
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, json_str.length());
+ curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L);
+
+ StreamingCallback sc;
+ OaiInfo oi{model, false /*include_usage*/, true /*oai_endpoint*/,
+ 0 /*n_probs*/};
+ sc.callback = std::make_shared(callback);
+ sc.need_stop = true;
+ sc.oi = oi;
+
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, &sc);
+ auto res = curl_easy_perform(curl);
+
+ if (res != CURLE_OK) {
+ CTL_WRN("CURL request failed: " << curl_easy_strerror(res));
+
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = true;
+ status["status_code"] = 500;
+
+ Json::Value error;
+ error["error"] = curl_easy_strerror(res);
+ callback(std::move(status), std::move(error));
+ }
+ curl_easy_cleanup(curl);
+ if (sc.need_stop) {
+ CTL_DBG("No stop message received, need to stop");
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = true;
+ status["status_code"] = 200;
+ (*sc.callback)(std::move(status), Json::Value());
+ }
+ });
+
+ } else {
+ Json::Value result;
+ // multiple choices
+ for (int i = 0; i < n; i++) {
+ auto response = curl_utils::SimplePostJson(url.ToFullPath(),
+ json_body->toStyledString());
+
+ if (response.has_value()) {
+ auto r = response.value();
+ if (i == 0) {
+ result = r;
+ } else {
+ r["choices"][0]["index"] = i;
+ result["choices"].append(r["choices"][0]);
+ result["usage"]["completion_tokens"] =
+ result["usage"]["completion_tokens"].asInt() +
+ r["usage"]["completion_tokens"].asInt();
+ result["usage"]["prompt_tokens"] =
+ result["usage"]["prompt_tokens"].asInt() +
+ r["usage"]["prompt_tokens"].asInt();
+ result["usage"]["total_tokens"] =
+ result["usage"]["total_tokens"].asInt() +
+ r["usage"]["total_tokens"].asInt();
+ }
+
+ if (i == n - 1) {
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = false;
+ status["status_code"] = 200;
+ callback(std::move(status), std::move(result));
+ }
+ } else {
+ CTL_WRN("Error: " << response.error());
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 500;
+ callback(std::move(status), std::move(response.value()));
+ break;
+ }
+ }
+ }
+}
+
+// (sang) duplicate code but it is easier to clean when
+// llama-server upstream is fully OpenAI API Compatible
+void LocalEngine::HandleNonOpenAiChatCompletion(
+ std::shared_ptr json_body, http_callback&& callback,
+ const std::string& model) {
+ CTL_DBG("Hanle NonOpenAI chat completion");
+ auto is_stream = (*json_body).get("stream", false).asBool();
+ auto include_usage = [&json_body, is_stream]() -> bool {
+ if (is_stream) {
+ if (json_body->isMember("stream_options") &&
+ !(*json_body)["stream_options"].isNull()) {
+ return (*json_body)["stream_options"]
+ .get("include_usage", false)
+ .asBool();
+ }
+ return false;
+ }
+ return false;
+ }();
+
+ auto n = [&json_body, is_stream]() -> int {
+ if (is_stream)
+ return 1;
+ return (*json_body).get("n", 1).asInt();
+ }();
+
+ auto& s = server_map_.at(model);
+
+ // Format logit_bias
+ if (json_body->isMember("logit_bias")) {
+ auto logit_bias = ConvertLogitBiasToArray((*json_body)["logit_bias"]);
+ (*json_body)["logit_bias"] = logit_bias;
+ }
+ auto get_message = [](const Json::Value& msg_content) -> std::string {
+ if (msg_content.isArray()) {
+ for (const auto& mc : msg_content) {
+ if (mc["type"].asString() == "text") {
+ return mc["text"].asString();
+ }
+ }
+ } else {
+ return msg_content.asString();
+ }
+ return "";
+ };
+
+ if (!json_body->isMember("prompt") ||
+ (*json_body)["prompt"].asString().empty()) {
+ auto formatted_output = s.pre_prompt;
+ for (const auto& message : (*json_body)["messages"]) {
+ auto input_role = message["role"].asString();
+ std::string role;
+ if (input_role == "user") {
+ role = s.user_prompt;
+ } else if (input_role == "assistant") {
+ role = s.ai_prompt;
+ } else if (input_role == "system") {
+ role = s.system_prompt;
+ } else {
+ role = input_role;
+ }
+
+ if (auto content = get_message(message["content"]); !content.empty()) {
+ formatted_output += role + content;
+ }
+ }
+ formatted_output += s.ai_prompt;
+ (*json_body)["prompt"] = formatted_output;
+ }
+
+ (*json_body)["n"] = 1;
+ int n_probs = json_body->get("n_probs", 0).asInt();
+
+ auto url = url_parser::Url{
+ /*.protocol*/ "http",
+ /*.host*/ s.host + ":" + std::to_string(s.port),
+ /*.pathParams*/ {"v1", "completions"},
+ /*.queries*/ {},
+ };
+
+ if (is_stream) {
+ q_.RunInQueue([s, json_body, callback, n_probs, model,
+ url = std::move(url)] {
+ auto curl = curl_easy_init();
+ if (!curl) {
+ CTL_WRN("Failed to initialize CURL");
+ return;
+ }
+
+ curl_easy_setopt(curl, CURLOPT_URL, url.ToFullPath().c_str());
+ curl_easy_setopt(curl, CURLOPT_POST, 1L);
+
+ struct curl_slist* headers = nullptr;
+ headers = curl_slist_append(headers, "Content-Type: application/json");
+ curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+ auto json_str = json_body->toStyledString();
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDS, json_str.c_str());
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, json_str.length());
+ curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L);
+
+ StreamingCallback sc;
+ OaiInfo oi{model, false /*include_usage*/, false /*oai_endpoint*/,
+ n_probs};
+ sc.callback = std::make_shared(callback);
+ sc.need_stop = true;
+ sc.oi = oi;
+
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, &sc);
+ auto res = curl_easy_perform(curl);
+
+ if (res != CURLE_OK) {
+ CTL_WRN("CURL request failed: " << curl_easy_strerror(res));
+
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = true;
+ status["status_code"] = 500;
+
+ Json::Value error;
+ error["error"] = curl_easy_strerror(res);
+ callback(std::move(status), std::move(error));
+ }
+ curl_easy_cleanup(curl);
+ if (sc.need_stop) {
+ CTL_DBG("No stop message received, need to stop");
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = true;
+ status["status_code"] = 200;
+ (*sc.callback)(std::move(status), Json::Value());
+ }
+ });
+
+ } else {
+
+ Json::Value result;
+ int prompt_tokens = 0;
+ int predicted_tokens = 0;
+ // multiple choices
+ for (int i = 0; i < n; i++) {
+ auto response = curl_utils::SimplePostJson(url.ToFullPath(),
+ json_body->toStyledString());
+ if (response.has_value()) {
+ auto r = response.value();
+ Json::Value logprobs;
+ prompt_tokens += r["tokens_evaluated"].asInt();
+ predicted_tokens += r["tokens_predicted"].asInt();
+ std::string to_send = r["content"].asString();
+ string_utils::LTrim(to_send);
+ if (n_probs > 0) {
+ logprobs = r["completion_probabilities"];
+ }
+ if (i == 0) {
+ result = CreateFullReturnJson(
+ GenerateRandomString(20), model, to_send, "_", prompt_tokens,
+ predicted_tokens, Json::Value("stop"), logprobs);
+ } else {
+ auto choice = CreateFullReturnJson(
+ GenerateRandomString(20), model, to_send, "_", prompt_tokens,
+ predicted_tokens, Json::Value("stop"), logprobs)["choices"][0];
+ choice["index"] = i;
+ result["choices"].append(choice);
+ result["usage"]["completion_tokens"] = predicted_tokens;
+ result["usage"]["prompt_tokens"] = prompt_tokens;
+ result["usage"]["total_tokens"] = predicted_tokens + prompt_tokens;
+ }
+
+ if (i == n - 1) {
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = false;
+ status["is_stream"] = false;
+ status["status_code"] = 200;
+ callback(std::move(status), std::move(result));
+ }
+ } else {
+ CTL_WRN("Error: " << response.error());
+ Json::Value status;
+ status["is_done"] = true;
+ status["has_error"] = true;
+ status["is_stream"] = false;
+ status["status_code"] = 500;
+ callback(std::move(status), std::move(response.value()));
+ break;
+ }
+ }
+ }
+}
+
+} // namespace cortex::local
diff --git a/engine/extensions/local-engine/local_engine.h b/engine/extensions/local-engine/local_engine.h
new file mode 100644
index 000000000..6dd970799
--- /dev/null
+++ b/engine/extensions/local-engine/local_engine.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include "cortex-common/EngineI.h"
+#include "json/json.h"
+#include "services/engine_service.h"
+#include "utils/process/utils.h"
+#include "utils/task_queue.h"
+
+namespace cortex::local {
+using http_callback = std::function;
+
+struct ServerAddress {
+ std::string host;
+ int port;
+ cortex::process::ProcessInfo process_info;
+ std::string pre_prompt;
+ std::string user_prompt;
+ std::string ai_prompt;
+ std::string system_prompt;
+ uint64_t start_time;
+};
+
+class LocalEngine : public EngineI {
+ public:
+ LocalEngine(EngineService& engine_service, TaskQueue& q)
+ : engine_service_(engine_service), q_(q) {}
+ ~LocalEngine();
+
+ void Load(EngineLoadOption opts) final {}
+
+ void Unload(EngineUnloadOption opts) final {}
+
+ void HandleChatCompletion(std::shared_ptr json_body,
+ http_callback&& callback) final;
+ void HandleEmbedding(std::shared_ptr json_body,
+ http_callback&& callback) final;
+ void LoadModel(std::shared_ptr json_body,
+ http_callback&& callback) final;
+ void UnloadModel(std::shared_ptr json_body,
+ http_callback&& callback) final;
+ void GetModelStatus(std::shared_ptr json_body,
+ http_callback&& callback) final;
+
+ // Get list of running models
+ void GetModels(std::shared_ptr jsonBody,
+ http_callback&& callback) final;
+
+ bool SetFileLogger(int max_log_lines, const std::string& log_path) final {
+ return true;
+ }
+ void SetLogLevel(trantor::Logger::LogLevel logLevel) final {}
+
+ // Stop inflight chat completion in stream mode
+ void StopInferencing(const std::string& model_id) final {}
+
+ private:
+ void HandleOpenAiChatCompletion(std::shared_ptr json_body,
+ http_callback&& callback,
+ const std::string& model);
+
+ void HandleNonOpenAiChatCompletion(std::shared_ptr json_body,
+ http_callback&& callback,
+ const std::string& model);
+
+ private:
+ std::unordered_map server_map_;
+ EngineService& engine_service_;
+ TaskQueue& q_;
+};
+
+} // namespace cortex::local
diff --git a/engine/main.cc b/engine/main.cc
index ab4e74857..abde0441b 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -196,15 +196,16 @@ void RunServer(bool ignore_cout) {
auto config_service = std::make_shared();
auto download_service =
std::make_shared(event_queue_ptr, config_service);
+ auto task_queue = std::make_shared(
+ std::min(2u, std::thread::hardware_concurrency()), "background_task");
auto engine_service = std::make_shared(
- download_service, dylib_path_manager, db_service);
+ download_service, dylib_path_manager, db_service, task_queue);
auto inference_svc = std::make_shared(engine_service);
auto model_src_svc = std::make_shared(db_service);
- cortex::TaskQueue task_queue(
- std::min(2u, std::thread::hardware_concurrency()), "background_task");
- auto model_service =
- std::make_shared(db_service, hw_service, download_service,
- inference_svc, engine_service, task_queue);
+
+ auto model_service = std::make_shared(
+ db_service, hw_service, download_service, inference_svc, engine_service,
+ *task_queue);
inference_svc->SetModelService(model_service);
auto file_watcher_srv = std::make_shared(
diff --git a/engine/repositories/file_fs_repository.cc b/engine/repositories/file_fs_repository.cc
index f5b349f45..67c0981ba 100644
--- a/engine/repositories/file_fs_repository.cc
+++ b/engine/repositories/file_fs_repository.cc
@@ -18,14 +18,10 @@ std::filesystem::path SanitizePath(const std::filesystem::path& user_input,
std::filesystem::path resolved_path = std::filesystem::weakly_canonical(
std::filesystem::path(basedir) / std::filesystem::path(user_input));
/* Ensure the resolved path is within our basedir */
- for (auto p = resolved_path; !p.empty(); p = p.parent_path()) {
- if (std::filesystem::equivalent(p, abs_base)) {
- return resolved_path;
- }
- if (p == p.parent_path()) { // reached the root directory
- break;
- }
+ if (resolved_path.string().find(abs_base.string()) != std::string::npos) {
+ return resolved_path;
}
+
return {};
}
diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc
index 48cc6ff37..15c7148c7 100644
--- a/engine/services/engine_service.cc
+++ b/engine/services/engine_service.cc
@@ -9,6 +9,7 @@
#include "config/model_config.h"
#include "database/engines.h"
#include "database/models.h"
+#include "extensions/local-engine/local_engine.h"
#include "extensions/remote-engine/remote_engine.h"
#include "utils/archive_utils.h"
@@ -16,6 +17,7 @@
#include "utils/engine_matcher_utils.h"
#include "utils/file_manager_utils.h"
#include "utils/github_release_utils.h"
+#include "utils/hardware/os_info.h"
#include "utils/logging_utils.h"
#include "utils/normalize_engine.h"
#include "utils/result.hpp"
@@ -46,13 +48,6 @@ std::string Repo2Engine(const std::string& r) {
}
return r;
};
-
-std::string GetEnginePath(std::string_view e) {
- if (e == kLlamaRepo) {
- return kLlamaLibPath;
- }
- return kLlamaLibPath;
-};
} // namespace
cpp::result EngineService::InstallEngineAsync(
@@ -236,11 +231,14 @@ cpp::result EngineService::DownloadEngine(
auto latest_version_semantic = normalized_version == "latest"
? res.value()[0].version
: normalized_version;
- auto merged_variant_name = engine + "-" + latest_version_semantic + "-" +
- variant_name.value() + ".tar.gz";
+ std::unordered_set merged_variant_name = {
+ "llama-" + latest_version_semantic + "-bin-" + variant_name.value() +
+ ".tar.gz", // menlo
+ "llama-" + latest_version_semantic + "-bin-" + variant_name.value() +
+ ".zip"}; // ggml
for (const auto& asset : res.value()) {
- if (asset.name == merged_variant_name) {
+ if (merged_variant_name.find(asset.name) != merged_variant_name.end()) {
selected_variant = asset;
break;
}
@@ -275,43 +273,96 @@ cpp::result EngineService::DownloadEngine(
}
}
- auto normalize_version = "v" + selected_variant->version;
auto variant_folder_name = engine_matcher_utils::GetVariantFromNameAndVersion(
selected_variant->name, engine, selected_variant->version);
auto variant_folder_path = file_manager_utils::GetEnginesContainerPath() /
engine / variant_folder_name.value() /
- normalize_version;
+ selected_variant->version;
auto variant_path = variant_folder_path / selected_variant->name;
std::filesystem::create_directories(variant_folder_path);
CTL_INF("variant_folder_path: " + variant_folder_path.string());
- auto on_finished = [this, engine, selected_variant, variant_folder_path,
- normalize_version](const DownloadTask& finishedTask) {
+ auto on_finished = [this, engine, selected_variant,
+ variant_folder_path](const DownloadTask& finishedTask) {
// try to unzip the downloaded file
CTL_INF("Engine zip path: " << finishedTask.items[0].localPath.string());
- CTL_INF("Version: " + normalize_version);
+ CTL_INF("Version: " + selected_variant->version);
auto extract_path = finishedTask.items[0].localPath.parent_path();
archive_utils::ExtractArchive(finishedTask.items[0].localPath.string(),
extract_path.string(), true);
-
+ CTL_INF("local path: " << finishedTask.items[0].localPath.string()
+ << ", extract path: " << extract_path.string());
auto variant = engine_matcher_utils::GetVariantFromNameAndVersion(
- selected_variant->name, engine, normalize_version);
-
+ selected_variant->name, engine, selected_variant->version);
CTL_INF("Extracted variant: " + variant.value());
- // set as default
+ try {
+ // Create version file
+ std::ofstream meta(extract_path / "version.txt", std::ios::out);
+ meta << "name: " << variant.value() << std::endl;
+ meta << "version: " << selected_variant->version << std::endl;
+ meta.close();
+
+ std::filesystem::path bin_path = extract_path / "build" / "bin";
+ if (std::filesystem::exists(bin_path)) {
+ for (const auto& entry :
+ std::filesystem::directory_iterator(bin_path)) {
+ if (entry.is_regular_file()) {
+ std::filesystem::path target_file =
+ extract_path / entry.path().filename();
+ std::filesystem::copy_file(
+ entry.path(), target_file,
+ std::filesystem::copy_options::overwrite_existing);
+ }
+ }
+ std::filesystem::remove_all(bin_path.parent_path());
+ }
+ if (!std::filesystem::exists(extract_path.parent_path().parent_path() /
+ "deps")) {
+ std::filesystem::create_directory(
+ extract_path.parent_path().parent_path() / "deps");
+ }
+ std::filesystem::permissions(extract_path / kLlamaServer,
+ std::filesystem::perms::owner_exec |
+ std::filesystem::perms::group_exec |
+ std::filesystem::perms::others_exec,
+ std::filesystem::perm_options::add);
+
+ const std::vector windows_deps = {
+ "msvcp140.dll", "vcruntime140.dll", "vcruntime140_1.dll"};
+ for (auto const& win_dep : windows_deps) {
+ if (std::filesystem::exists(
+ file_manager_utils::GetExecutableFolderContainerPath() /
+ win_dep)) {
+ CTL_INF("Copy file "
+ << (file_manager_utils::GetExecutableFolderContainerPath() /
+ win_dep)
+ .string()
+ << " to " << extract_path.string());
+ std::filesystem::copy_file(
+ file_manager_utils::GetExecutableFolderContainerPath() / win_dep,
+ extract_path / win_dep,
+ std::filesystem::copy_options::overwrite_existing);
+ }
+ }
+
+ } catch (const std::exception& e) {
+ CTL_INF(e.what());
+ }
- auto res =
- SetDefaultEngineVariant(engine, normalize_version, variant.value());
+ // set as default
+ auto res = SetDefaultEngineVariant(engine, selected_variant->version,
+ variant.value());
if (res.has_error()) {
CTL_ERR("Failed to set default engine variant: " << res.error());
} else {
CTL_INF("Set default engine variant: " << res.value().variant);
}
- auto create_res = EngineService::UpsertEngine(
- engine, // engine_name
- kLocal, "", "", normalize_version, variant.value(), "Default", "");
+ auto create_res =
+ EngineService::UpsertEngine(engine, // engine_name
+ kLocal, "", "", selected_variant->version,
+ variant.value(), "Default", "");
if (create_res.has_error()) {
CTL_ERR("Failed to create engine entry: " << create_res->engine_name);
@@ -322,7 +373,7 @@ cpp::result EngineService::DownloadEngine(
for (const auto& entry : std::filesystem::directory_iterator(
variant_folder_path.parent_path())) {
if (entry.is_directory() &&
- entry.path().filename() != normalize_version) {
+ entry.path().filename() != selected_variant->version) {
try {
std::filesystem::remove_all(entry.path());
} catch (const std::exception& e) {
@@ -450,7 +501,26 @@ std::string EngineService::GetMatchedVariant(
cpp::result, std::string>
EngineService::GetEngineReleases(const std::string& engine) const {
auto ne = cortex::engine::NormalizeEngine(engine);
- return github_release_utils::GetReleases("menloresearch", ne);
+ auto ggml_org = github_release_utils::GetReleases(kGgmlOrg, ne);
+ auto menlo = github_release_utils::GetReleases(kMenloOrg, ne);
+ if (ggml_org.has_error() && menlo.has_error()) {
+ return cpp::fail(ggml_org.error());
+ }
+ auto comparator = [](const EngineService::EngineRelease& e1,
+ const EngineService::EngineRelease& e2) {
+ return e1.name > e2.name;
+ };
+ std::set s(comparator);
+ if (ggml_org.has_value()) {
+ s.insert(ggml_org.value().begin(), ggml_org.value().end());
+ }
+
+ if (menlo.has_value()) {
+ s.insert(menlo.value().begin(), menlo.value().end());
+ }
+ std::vector res;
+ std::copy(s.begin(), s.end(), std::back_inserter(res));
+ return res;
}
cpp::result, std::string>
@@ -458,16 +528,85 @@ EngineService::GetEngineVariants(const std::string& engine,
const std::string& version,
bool filter_compatible_only) const {
auto ne = cortex::engine::NormalizeEngine(engine);
- auto engine_release =
- github_release_utils::GetReleaseByVersion("menloresearch", ne, version);
+ auto engine_release_menlo =
+ github_release_utils::GetReleaseByVersion(kMenloOrg, ne, version);
+ auto engine_release_ggml =
+ github_release_utils::GetReleaseByVersion(kGgmlOrg, ne, version);
+
+ if (engine_release_menlo.has_error() && engine_release_ggml.has_error()) {
+ return cpp::fail("Failed to get engine release: " +
+ engine_release_menlo.error());
+ }
+ if (engine_release_menlo.has_error()) {
+ CTL_WRN("Failed to get engine release: " << engine_release_menlo.error());
+ }
- if (engine_release.has_error()) {
- return cpp::fail("Failed to get engine release: " + engine_release.error());
+ if (engine_release_ggml.has_error()) {
+ CTL_WRN("Failed to get engine release: " << engine_release_ggml.error());
}
std::vector compatible_variants;
- for (const auto& variant : engine_release.value().assets) {
- if (variant.content_type != "application/gzip") {
+ std::vector assets;
+
+ auto get_os_major = []() -> int {
+ auto os_info = cortex::hw::GetOSInfo();
+ // Get os major version
+ size_t dot_pos = os_info.version.find_first_of(".");
+ if (dot_pos != std::string::npos) {
+ try {
+ return std::stoi(os_info.version.substr(0, dot_pos));
+ } catch (const std::exception& e) {
+ return 0;
+ }
+ } else {
+ // No version found
+ return 0;
+ }
+ };
+
+ if (engine_release_menlo.has_value()) {
+ // In case of macos, if os version is 12, we get binary from menlo
+ std::copy_if(
+ engine_release_menlo.value().assets.begin(),
+ engine_release_menlo.value().assets.end(), std::back_inserter(assets),
+ [get_os_major](const github_release_utils::GitHubAsset& assets) {
+#if defined(__APPLE__) && defined(__MACH__)
+ if ((assets.name.find(kMacOs) == std::string::npos) ||
+ (get_os_major() <= 12 &&
+ assets.name.find(kMacOs) != std::string::npos)) {
+ return true;
+ }
+ return false;
+#else
+ return true;
+#endif
+ });
+ }
+
+ if (engine_release_ggml.has_value()) {
+ // In case of macos, if os version is 12, we get binary from menlo
+ std::copy_if(
+ engine_release_ggml.value().assets.begin(),
+ engine_release_ggml.value().assets.end(), std::back_inserter(assets),
+ [get_os_major](const github_release_utils::GitHubAsset& assets) {
+#if defined(__APPLE__) && defined(__MACH__)
+ if ((assets.name.find(kMacOs) == std::string::npos) ||
+ (get_os_major() > 12 &&
+ assets.name.find(kMacOs) != std::string::npos)) {
+ return true;
+ }
+ return false;
+#else
+ return true;
+#endif
+ });
+ }
+
+ for (const auto& variant : assets) {
+ CTL_INF("content_type: " << variant.content_type
+ << ", name: " << variant.name);
+ if (variant.content_type != "application/gzip" &&
+ variant.content_type != "application/json; charset=utf-8") {
continue;
}
if (variant.state != "uploaded") {
@@ -494,30 +633,29 @@ EngineService::GetEngineVariants(const std::string& engine,
name.find("mac") != std::string::npos)
os_match = true;
if (system_info->os == "windows" &&
- name.find("windows") != std::string::npos)
+ name.find("win") != std::string::npos)
os_match = true;
if (system_info->os == "linux" &&
- name.find("linux") != std::string::npos)
+ (name.find("linux") != std::string::npos ||
+ name.find("ubuntu") != std::string::npos))
os_match = true;
bool arch_match = false;
if (system_info->arch == "arm64" &&
name.find("arm64") != std::string::npos)
arch_match = true;
- if (system_info->arch == "amd64" &&
- name.find("amd64") != std::string::npos)
+ if (system_info->arch == "x64" &&
+ name.find("x64") != std::string::npos)
arch_match = true;
return !(os_match && arch_match);
}),
compatible_variants.end());
-
if (compatible_variants.empty()) {
return cpp::fail("No compatible variants found for system " +
system_info->os + "/" + system_info->arch);
}
}
-
return compatible_variants;
}
@@ -550,7 +688,7 @@ EngineService::SetDefaultEngineVariant(const std::string& engine,
auto normalized_version = string_utils::RemoveSubstring(version, "v");
auto config = file_manager_utils::GetCortexConfig();
- config.llamacppVersion = "v" + normalized_version;
+ config.llamacppVersion = normalized_version;
config.llamacppVariant = variant;
auto result = file_manager_utils::UpdateCortexConfig(config);
if (result.has_error()) {
@@ -574,10 +712,10 @@ cpp::result EngineService::IsEngineVariantReady(
return cpp::fail(installed_engines.error());
}
- CLI_LOG("IsEngineVariantReady: " << ne << ", " << normalized_version << ", "
+ CTL_INF("IsEngineVariantReady: " << ne << ", " << normalized_version << ", "
<< variant);
for (const auto& installed_engine : installed_engines.value()) {
- CLI_LOG("Installed: name: " + installed_engine.name +
+ CTL_INF("Installed: name: " + installed_engine.name +
", version: " + installed_engine.version);
if ((installed_engine.name == variant &&
installed_engine.version == normalized_version) ||
@@ -634,16 +772,22 @@ EngineService::GetInstalledEngineVariants(const std::string& engine) const {
// try to find version.txt
auto version_txt_path = version_entry.path() / "version.txt";
if (!std::filesystem::exists(version_txt_path)) {
- continue;
+ // create new one
+ std::ofstream meta(version_txt_path, std::ios::out);
+ meta << "name: " << entry.path().filename() << std::endl;
+ meta << "version: " << version_entry.path().filename() << std::endl;
+ meta.close();
+ CTL_INF("name: " << entry.path().filename().string() << ", version: "
+ << version_entry.path().filename().string());
}
try {
auto node = YAML::LoadFile(version_txt_path.string());
auto ev = EngineVariantResponse{
- node["name"].as(), // name
- "v" + node["version"].as(), // version
- engine, // engine
- "", // type
+ node["name"].as(), // name
+ node["version"].as(), // version
+ engine, // engine
+ "", // type
};
variants.push_back(ev);
} catch (const YAML::Exception& e) {
@@ -696,76 +840,18 @@ cpp::result EngineService::LoadEngine(
}
return {};
}
-
- // End hard code
-
- CTL_INF("Loading engine: " << ne);
+ if (engines_.find(ne) == engines_.end()) {
+ CTL_INF("Loading local engine: " << engine_name);
#if defined(_WIN32) || defined(_WIN64) || defined(__linux__)
- CTL_INF("CPU Info: " << cortex::cpuid::CpuInfo().to_string());
+ CTL_INF("CPU Info: " << cortex::cpuid::CpuInfo().to_string());
#endif
-
- auto engine_dir_path_res = GetEngineDirPath(ne);
- if (engine_dir_path_res.has_error()) {
- return cpp::fail(engine_dir_path_res.error());
+ engines_[ne].engine = new cortex::local::LocalEngine(*this, *(q_.get()));
+ CTL_INF("Loaded engine: " << engine_name);
+ } else {
+ CTL_INF("Engine has already been loaded: " << engine_name);
}
- auto engine_dir_path = engine_dir_path_res.value().first;
- auto custom_engine_path = engine_dir_path_res.value().second;
-
- try {
- auto cuda_path = file_manager_utils::GetCudaToolkitPath(ne);
-
-#if defined(_WIN32) || defined(_WIN64)
- // register deps
- if (!(getenv("ENGINE_PATH"))) {
- std::vector paths{};
- paths.push_back(cuda_path);
- paths.push_back(engine_dir_path);
- CTL_DBG("Registering dylib for "
- << ne << " with " << std::to_string(paths.size()) << " paths.");
- for (const auto& path : paths) {
- CTL_DBG("Registering path: " << path.string());
- }
-
- auto reg_result = dylib_path_manager_->RegisterPath(ne, paths);
- if (reg_result.has_error()) {
- CTL_DBG("Failed register lib paths for: " << ne);
- } else {
- CTL_DBG("Registered lib paths for: " << ne);
- }
- }
-#endif
-
- auto dylib =
- std::make_unique(engine_dir_path.string(), "engine");
-
- auto config = file_manager_utils::GetCortexConfig();
- auto log_path = std::filesystem::path(config.logFolderPath) /
- std::filesystem::path(config.logLlamaCppPath);
-
- // init
- auto func = dylib->get_function("get_engine");
- auto engine_obj = func();
- auto load_opts = EngineI::EngineLoadOption{
- /* .engine_path = */ engine_dir_path,
- /* .deps_path = */ cuda_path,
- /* .is_custom_engine_path = */ custom_engine_path,
- /* .log_path = */ log_path,
- /* .max_log_lines = */ config.maxLogLines,
- /* .log_level = */ logging_utils_helper::global_log_level,
- };
- engine_obj->Load(load_opts);
-
- engines_[ne].engine = engine_obj;
- engines_[ne].dl = std::move(dylib);
-
- CTL_DBG("Engine loaded: " << ne);
- return {};
- } catch (const cortex_cpp::dylib::load_error& e) {
- CTL_ERR("Could not load engine: " << e.what());
- engines_.erase(ne);
- return cpp::fail("Could not load engine " + ne + ": " + e.what());
- }
+ return {};
}
void EngineService::RegisterEngineLibPath() {
@@ -785,7 +871,9 @@ void EngineService::RegisterEngineLibPath() {
// register deps
std::vector paths{};
- paths.push_back(cuda_path);
+ if (std::filesystem::exists(cuda_path)) {
+ paths.push_back(cuda_path);
+ }
paths.push_back(engine_dir_path);
CTL_DBG("Registering dylib for "
@@ -796,7 +884,8 @@ void EngineService::RegisterEngineLibPath() {
auto reg_result = dylib_path_manager_->RegisterPath(ne, paths);
if (reg_result.has_error()) {
- CTL_WRN("Failed register lib path for " << engine);
+ CTL_WRN("Failed register lib path for "
+ << engine << ", error: " << reg_result.error());
} else {
CTL_DBG("Registered lib path for " << engine);
}
@@ -829,8 +918,8 @@ EngineService::GetEngineDirPath(const std::string& engine_name) {
CTL_DBG("user defined engine path: " << user_defined_engine_path);
const std::filesystem::path engine_dir_path = [&] {
if (user_defined_engine_path != nullptr) {
- return std::filesystem::path(user_defined_engine_path) /
- GetEnginePath(ne) / selected_engine_variant->variant /
+ return std::filesystem::path(user_defined_engine_path) / kLlamaLibPath /
+ selected_engine_variant->variant /
selected_engine_variant->version;
} else {
return file_manager_utils::GetEnginesContainerPath() / ne /
@@ -891,8 +980,7 @@ std::vector EngineService::GetLoadedEngines() {
cpp::result
EngineService::GetLatestEngineVersion(const std::string& engine) const {
auto ne = cortex::engine::NormalizeEngine(engine);
- auto res =
- github_release_utils::GetReleaseByVersion("menloresearch", ne, "latest");
+ auto res = github_release_utils::GetReleaseByVersion(kMenloOrg, ne, "latest");
if (res.has_error()) {
return cpp::fail("Failed to fetch engine " + engine + " latest version!");
}
diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h
index 7e6be74c5..0be1fff64 100644
--- a/engine/services/engine_service.h
+++ b/engine/services/engine_service.h
@@ -19,6 +19,7 @@
#include "utils/github_release_utils.h"
#include "utils/result.hpp"
#include "utils/system_info_utils.h"
+#include "utils/task_queue.h"
struct EngineUpdateResult {
std::string engine;
@@ -44,7 +45,6 @@ class EngineService : public EngineServiceI {
using EngineVariant = github_release_utils::GitHubAsset;
struct EngineInfo {
- std::unique_ptr dl;
EngineV engine;
};
@@ -60,12 +60,13 @@ class EngineService : public EngineServiceI {
};
HardwareInfo hw_inf_;
std::shared_ptr db_service_ = nullptr;
+ std::shared_ptr q_ = nullptr;
public:
- explicit EngineService(
- std::shared_ptr download_service,
- std::shared_ptr dylib_path_manager,
- std::shared_ptr db_service)
+ EngineService(std::shared_ptr download_service,
+ std::shared_ptr dylib_path_manager,
+ std::shared_ptr db_service,
+ std::shared_ptr q)
: download_service_{download_service},
dylib_path_manager_{dylib_path_manager},
hw_inf_{
@@ -74,9 +75,17 @@ class EngineService : public EngineServiceI {
system_info_utils::GetDriverAndCudaVersion()
.second // cuda_driver_version.
},
+ db_service_(db_service),
+ q_(q) {}
- db_service_(db_service) {}
-
+ EngineService(std::shared_ptr dylib_path_manager)
+ : dylib_path_manager_(dylib_path_manager),
+ hw_inf_{
+ system_info_utils::GetSystemInfo(), // sys_inf.
+ {}, // cpu_info.
+ system_info_utils::GetDriverAndCudaVersion()
+ .second // cuda_driver_version.
+ } {}
std::vector GetEngineInfoList() const;
/**
@@ -159,6 +168,9 @@ class EngineService : public EngineServiceI {
bool IsRemoteEngine(const std::string& engine_name) const override;
+ cpp::result, std::string>
+ GetEngineDirPath(const std::string& engine_name);
+
private:
bool IsEngineLoaded(const std::string& engine);
@@ -172,9 +184,6 @@ class EngineService : public EngineServiceI {
std::string GetMatchedVariant(const std::string& engine,
const std::vector& variants);
- cpp::result, std::string>
- GetEngineDirPath(const std::string& engine_name);
-
cpp::result IsEngineVariantReady(
const std::string& engine, const std::string& version,
const std::string& variant);
diff --git a/engine/services/hardware_service.cc b/engine/services/hardware_service.cc
index f0ccadb28..fb2f841be 100644
--- a/engine/services/hardware_service.cc
+++ b/engine/services/hardware_service.cc
@@ -203,10 +203,8 @@ bool HardwareService::Restart(const std::string& host, int port) {
#else
std::vector commands;
// Some engines requires to add lib search path before process being created
- auto download_srv = std::make_shared();
- auto dylib_path_mng = std::make_shared();
- auto db_srv = std::make_shared();
- EngineService(download_srv, dylib_path_mng, db_srv).RegisterEngineLibPath();
+ EngineService(std::make_shared())
+ .RegisterEngineLibPath();
std::string p = cortex_utils::GetCurrentPath() / exe;
commands.push_back(p);
commands.push_back("--ignore_cout");
diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc
index a1646495b..e07ed71ba 100644
--- a/engine/services/inference_service.cc
+++ b/engine/services/inference_service.cc
@@ -12,7 +12,7 @@ cpp::result InferenceService::HandleChatCompletion(
} else {
engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
}
- function_calling_utils::PreprocessRequest(json_body);
+ CTL_DBG("engine_type: " << engine_type);
auto tool_choice = json_body->get("tool_choice", Json::Value::null);
auto model_id = json_body->get("model", "").asString();
if (saved_models_.find(model_id) != saved_models_.end()) {
@@ -32,6 +32,7 @@ cpp::result InferenceService::HandleChatCompletion(
}
}
}
+ CTL_DBG("engine_type: " << engine_type);
auto engine_result = engine_service_->GetLoadedEngine(engine_type);
if (engine_result.has_error()) {
@@ -43,51 +44,6 @@ cpp::result InferenceService::HandleChatCompletion(
return cpp::fail(std::make_pair(stt, res));
}
- if (!model_id.empty()) {
- if (auto model_service = model_service_.lock()) {
- auto metadata_ptr = model_service->GetCachedModelMetadata(model_id);
- if (metadata_ptr != nullptr &&
- !metadata_ptr->tokenizer->chat_template.empty()) {
- auto tokenizer = metadata_ptr->tokenizer;
- auto messages = (*json_body)["messages"];
- Json::Value messages_jsoncpp(Json::arrayValue);
- for (auto message : messages) {
- messages_jsoncpp.append(message);
- }
-
- Json::Value tools(Json::arrayValue);
- Json::Value template_data_json;
- template_data_json["messages"] = messages_jsoncpp;
- // template_data_json["tools"] = tools;
-
- auto prompt_result = jinja::RenderTemplate(
- tokenizer->chat_template, template_data_json, tokenizer->bos_token,
- tokenizer->eos_token, tokenizer->add_bos_token,
- tokenizer->add_eos_token, tokenizer->add_generation_prompt);
- if (prompt_result.has_value()) {
- (*json_body)["prompt"] = prompt_result.value();
- if (json_body->isMember("stop")) {
- bool need_append = true;
- for (auto& s : (*json_body)["stop"]) {
- if (s.asString() == tokenizer->eos_token) {
- need_append = false;
- }
- }
- if (need_append) {
- (*json_body)["stop"].append(tokenizer->eos_token);
- }
- } else {
- Json::Value stops(Json::arrayValue);
- stops.append(tokenizer->eos_token);
- (*json_body)["stop"] = stops;
- }
- } else {
- CTL_ERR("Failed to render prompt: " + prompt_result.error());
- }
- }
- }
- }
-
CTL_DBG("Json body inference: " + json_body->toStyledString());
auto cb = [q, tool_choice](Json::Value status, Json::Value res) {
@@ -275,9 +231,7 @@ InferResult InferenceService::GetModels(
for (const auto& loaded_engine : loaded_engines) {
if (std::holds_alternative(loaded_engine)) {
auto e = std::get(loaded_engine);
- if (e->IsSupported("GetModels")) {
- e->GetModels(json_body, std::move(cb));
- }
+ e->GetModels(json_body, std::move(cb));
} else {
std::get(loaded_engine)
->GetModels(json_body, std::move(cb));
@@ -302,10 +256,8 @@ bool InferenceService::StopInferencing(const std::string& engine_name,
if (std::holds_alternative(engine_result.value())) {
auto engine = std::get(engine_result.value());
- if (engine->IsSupported("StopInferencing")) {
- engine->StopInferencing(model_id);
- CTL_INF("Stopped inferencing");
- }
+ engine->StopInferencing(model_id);
+ CTL_INF("Stopped inferencing");
}
return true;
}
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index d9359b698..a3771e0a1 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -165,8 +165,8 @@ ModelService::ModelService(std::shared_ptr db_service,
download_service_{download_service},
inference_svc_(inference_service),
engine_svc_(engine_svc),
- task_queue_(task_queue) {
- // ProcessBgrTasks();
+ task_queue_(task_queue){
+ // ProcessBgrTasks();
};
void ModelService::ForceIndexingModelList() {
@@ -500,13 +500,10 @@ cpp::result ModelService::DeleteModel(
std::filesystem::remove(yaml_fp);
CTL_INF("Removed: " << yaml_fp.string());
} else {
- // Remove yaml files
- for (const auto& entry :
- std::filesystem::directory_iterator(yaml_fp.parent_path())) {
- if (entry.is_regular_file() && (entry.path().extension() == ".yml")) {
- std::filesystem::remove(entry);
- CTL_INF("Removed: " << entry.path().string());
- }
+ // Is a local model - Remove only this model's yaml file
+ if (std::filesystem::exists(yaml_fp)) {
+ std::filesystem::remove(yaml_fp);
+ CTL_INF("Removed: " << yaml_fp.string());
}
}
@@ -557,6 +554,8 @@ cpp::result ModelService::StartModel(
if (auto& o = params_override["ctx_len"]; !o.isNull()) {
ctx_len = o.asInt();
}
+ Json::Value model_load_params;
+ json_helper::MergeJson(model_load_params, params_override);
try {
constexpr const int kDefautlContextLength = 8192;
@@ -627,9 +626,14 @@ cpp::result ModelService::StartModel(
#if defined(_WIN32)
json_data["model_path"] = cortex::wc::WstringToUtf8(
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring());
+ model_load_params["model_path"] =
+ cortex::wc::WstringToUtf8(
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).wstring());
#else
json_data["model_path"] =
fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
+ model_load_params["model_path"] =
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.files[0])).string();
#endif
} else {
LOG_WARN << "model_path is empty";
@@ -642,6 +646,8 @@ cpp::result ModelService::StartModel(
#else
json_data["mmproj"] =
fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
+ model_load_params["model_path"] =
+ fmu::ToAbsoluteCortexDataPath(fs::path(mc.mmproj)).string();
#endif
}
json_data["system_prompt"] = mc.system_template;
@@ -655,6 +661,7 @@ cpp::result ModelService::StartModel(
}
json_data["model"] = model_handle;
+ model_load_params["model"] = model_handle;
if (auto& cpt = custom_prompt_template; !cpt.value_or("").empty()) {
auto parse_prompt_result = string_utils::ParsePrompt(cpt.value());
json_data["system_prompt"] = parse_prompt_result.system_prompt;
@@ -662,8 +669,6 @@ cpp::result ModelService::StartModel(
json_data["ai_prompt"] = parse_prompt_result.ai_prompt;
}
- json_helper::MergeJson(json_data, params_override);
-
// Set default cpu_threads if it is not configured
if (!json_data.isMember("cpu_threads")) {
json_data["cpu_threads"] = GetCpuThreads();
@@ -686,26 +691,12 @@ cpp::result ModelService::StartModel(
assert(!!inference_svc_);
- auto ir =
- inference_svc_->LoadModel(std::make_shared(json_data));
+ auto ir = inference_svc_->LoadModel(
+ std::make_shared(model_load_params));
auto status = std::get<0>(ir)["status_code"].asInt();
auto data = std::get<1>(ir);
if (status == drogon::k200OK) {
- // start model successfully, in case not vision model, we store the metadata so we can use
- // for each inference
- if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) {
- auto metadata_res = GetModelMetadata(model_handle);
- if (metadata_res.has_value()) {
- loaded_model_metadata_map_.emplace(model_handle,
- std::move(metadata_res.value()));
- CTL_INF("Successfully stored metadata for model " << model_handle);
- } else {
- CTL_WRN("Failed to get metadata for model " << model_handle << ": "
- << metadata_res.error());
- }
- }
-
return StartModelResult{/* .success = */ true,
/* .warning = */ may_fallback_res.value()};
} else if (status == drogon::k409Conflict) {
@@ -760,8 +751,6 @@ cpp::result ModelService::StopModel(
if (bypass_check) {
bypass_stop_check_set_.erase(model_handle);
}
- loaded_model_metadata_map_.erase(model_handle);
- CTL_INF("Removed metadata for model " << model_handle);
return true;
} else {
CTL_ERR("Model failed to stop with status code: " << status);
@@ -1047,13 +1036,15 @@ ModelService::MayFallbackToCpu(const std::string& model_path, int ngl,
auto es = hardware::EstimateLLaMACppRun(model_path, rc);
if (!!es && (*es).gpu_mode.vram_MiB > free_vram_MiB && is_cuda) {
- CTL_WRN("Not enough VRAM - " << "required: " << (*es).gpu_mode.vram_MiB
- << ", available: " << free_vram_MiB);
+ CTL_WRN("Not enough VRAM - "
+ << "required: " << (*es).gpu_mode.vram_MiB
+ << ", available: " << free_vram_MiB);
}
if (!!es && (*es).cpu_mode.ram_MiB > free_ram_MiB) {
- CTL_WRN("Not enough RAM - " << "required: " << (*es).cpu_mode.ram_MiB
- << ", available: " << free_ram_MiB);
+ CTL_WRN("Not enough RAM - "
+ << "required: " << (*es).cpu_mode.ram_MiB
+ << ", available: " << free_ram_MiB);
}
return warning;
@@ -1090,14 +1081,6 @@ ModelService::GetModelMetadata(const std::string& model_id) const {
return std::move(*model_metadata_res);
}
-std::shared_ptr ModelService::GetCachedModelMetadata(
- const std::string& model_id) const {
- if (loaded_model_metadata_map_.find(model_id) ==
- loaded_model_metadata_map_.end())
- return nullptr;
- return loaded_model_metadata_map_.at(model_id);
-}
-
std::string ModelService::GetEngineByModelId(
const std::string& model_id) const {
namespace fs = std::filesystem;
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index beba91f8c..fa247b954 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -83,9 +83,6 @@ class ModelService {
cpp::result, std::string> GetModelMetadata(
const std::string& model_id) const;
- std::shared_ptr GetCachedModelMetadata(
- const std::string& model_id) const;
-
std::string GetEngineByModelId(const std::string& model_id) const;
private:
@@ -104,12 +101,6 @@ class ModelService {
std::unordered_set bypass_stop_check_set_;
std::shared_ptr engine_svc_ = nullptr;
- /**
- * Store the chat template of loaded model.
- */
- std::unordered_map>
- loaded_model_metadata_map_;
-
std::mutex es_mtx_;
std::unordered_map> es_;
cortex::TaskQueue& task_queue_;
diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc
index b5979667c..661b9b580 100644
--- a/engine/services/model_source_service.cc
+++ b/engine/services/model_source_service.cc
@@ -433,8 +433,7 @@ cpp::result ModelSourceService::AddCortexsoRepo(
auto author = hub_author;
auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
- if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name);
- model_author.has_value() && !model_author.value().empty()) {
+ if (model_author.has_value() && !model_author.value().empty()) {
author = model_author.value();
}
diff --git a/engine/test/components/test_engine_matcher_utils.cc b/engine/test/components/test_engine_matcher_utils.cc
index 1d1ed47a8..2c24a9b6f 100644
--- a/engine/test/components/test_engine_matcher_utils.cc
+++ b/engine/test/components/test_engine_matcher_utils.cc
@@ -6,125 +6,78 @@
class EngineMatcherUtilsTestSuite : public ::testing::Test {
protected:
const std::vector cortex_llamacpp_variants{
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx2.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx512.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz",
- "cortex.llamacpp-0.1.43-linux-arm64.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx512.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-11-7.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx-cuda-12-0.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-noavx.tar.gz",
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-vulkan.tar.gz",
+ "llama-b4920-bin-ubuntu-arm64.zip",
+ "llama-b4920-bin-linux-avx-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-linux-avx-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-linux-avx-x64.tar.gz",
+ "llama-b4920-bin-linux-avx2-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-linux-avx2-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-ubuntu-x64.tar.gz",
+ "llama-b4920-bin-linux-avx512-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-linux-avx512-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-linux-avx512-x64.tar.gz",
+ "llama-b4920-bin-linux-noavx-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-linux-noavx-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-linux-noavx-x64.tar.gz",
+ "llama-b4920-bin-ubuntu-vulkan-x64.tar.gz",
+ "llama-b4920-bin-macos-arm64.zip",
+ "llama-b4920-bin-macos-x64.zip",
+ "llama-b4920-bin-win-avx-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-win-avx-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-win-avx-x64.zip",
+ "llama-b4920-bin-win-avx2-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-win-avx2-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-win-avx2-x64.zip",
+ "llama-b4920-bin-win-avx512-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-win-avx512-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-win-avx512-x64.zip",
+ "llama-b4920-bin-win-noavx-cuda-cu11.7-x64.tar.gz",
+ "llama-b4920-bin-win-noavx-cuda-cu12.0-x64.tar.gz",
+ "llama-b4920-bin-win-noavx-x64.zip",
+ "llama-b4920-bin-win-vulkan-x64.zip",
};
-
- const std::vector cortex_tensorrt_variants{
- "cortex.tensorrt-llm-0.0.9-linux-cuda-12-4.tar.gz",
- "cortex.tensorrt-llm-0.0.9-windows-cuda-12-4.tar.gz"};
-
- const std::vector cortex_onnx_variants{
- "cortex.onnx-0.1.7-windows-amd64.tar.gz"};
};
-TEST_F(EngineMatcherUtilsTestSuite, TestValidateOnnx) {
-
- {
- auto expect_matched_variant = cortex_onnx_variants[0];
- auto result = engine_matcher_utils::ValidateOnnx(cortex_onnx_variants,
- "windows", "amd64");
-
- EXPECT_EQ(result, expect_matched_variant);
- }
-
- {
- // should return an empty variant because no variant matched
- auto expect_matched_variant{""};
- auto windows_arm_result = engine_matcher_utils::ValidateOnnx(
- cortex_onnx_variants, "windows", "arm");
- auto mac_arm64_result = engine_matcher_utils::ValidateOnnx(
- cortex_onnx_variants, "mac", "arm64");
-
- EXPECT_EQ(windows_arm_result, expect_matched_variant);
- EXPECT_EQ(mac_arm64_result, expect_matched_variant);
- }
-}
-
-TEST_F(EngineMatcherUtilsTestSuite, TestValidateTensorrt) {
-
+TEST_F(EngineMatcherUtilsTestSuite, TestValidate) {
{
- auto windows_expect_matched_variant{cortex_tensorrt_variants[1]};
- auto linux_expect_matched_variant{cortex_tensorrt_variants[0]};
- auto windows{"windows"};
- auto linux{"linux"};
+ auto os{"win"};
+ auto cpu_arch{"x64"};
+ auto suitable_avx{"avx2"};
auto cuda_version{"12.4"};
- auto windows_result = engine_matcher_utils::ValidateTensorrtLlm(
- cortex_tensorrt_variants, windows, cuda_version);
- auto linux_result = engine_matcher_utils::ValidateTensorrtLlm(
- cortex_tensorrt_variants, linux, cuda_version);
- EXPECT_EQ(windows_result, windows_expect_matched_variant);
- EXPECT_EQ(linux_result, linux_expect_matched_variant);
- }
-
- { // macos is not supported
- auto os = "mac";
- auto cuda_version{"12.4"};
+ auto variant = engine_matcher_utils::Validate(
+ cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version);
- auto result = engine_matcher_utils::ValidateTensorrtLlm(
- cortex_tensorrt_variants, os, cuda_version);
- EXPECT_EQ(result, "");
+ EXPECT_EQ(variant, "llama-b4920-bin-win-avx2-cuda-cu12.0-x64.tar.gz");
}
-}
-TEST_F(EngineMatcherUtilsTestSuite, TestValidate) {
{
- auto os{"windows"};
- auto cpu_arch{"amd64"};
- auto suitable_avx{"avx2"};
- auto cuda_version{"12.4"};
+ auto os{"mac"};
+ auto cpu_arch{"x64"};
+ auto suitable_avx{""};
+ auto cuda_version{""};
auto variant = engine_matcher_utils::Validate(
cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version);
- EXPECT_EQ(
- variant,
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2-cuda-12-0.tar.gz");
+ EXPECT_EQ(variant, "llama-b4920-bin-macos-x64.zip");
}
{
auto os{"mac"};
- auto cpu_arch{"amd64"};
+ auto cpu_arch{"arm64"};
auto suitable_avx{""};
auto cuda_version{""};
auto variant = engine_matcher_utils::Validate(
cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version);
- EXPECT_EQ(variant, "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz");
+ EXPECT_EQ(variant, "llama-b4920-bin-macos-arm64.zip");
}
{
- auto os{"windows"};
- auto cpu_arch{"amd64"};
+ auto os{"win"};
+ auto cpu_arch{"x64"};
auto suitable_avx{"avx2"};
auto cuda_version{"10"};
@@ -132,8 +85,7 @@ TEST_F(EngineMatcherUtilsTestSuite, TestValidate) {
cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version);
// fallback to no cuda version
- EXPECT_EQ(variant,
- "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz");
+ EXPECT_EQ(variant, "llama-b4920-bin-win-avx2-x64.zip");
}
{
@@ -145,30 +97,43 @@ TEST_F(EngineMatcherUtilsTestSuite, TestValidate) {
auto variant = engine_matcher_utils::Validate(
cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version);
- EXPECT_EQ(variant, "cortex.llamacpp-0.1.43-linux-arm64.tar.gz");
+ EXPECT_EQ(variant, "llama-b4920-bin-ubuntu-arm64.zip");
}
}
TEST_F(EngineMatcherUtilsTestSuite, TestGetVersionAndArch) {
{
- std::string variant =
- "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-avx-cuda-11-7.tar.gz";
+ std::string variant = "llama-b4920-bin-linux-avx-cuda-cu11.7-x64.tar.gz";
+ auto [version, arch] = engine_matcher_utils::GetVersionAndArch(variant);
+ EXPECT_EQ(version, "b4920");
+ EXPECT_EQ(arch, "linux-avx-cuda-cu11.7-x64");
+ }
+
+ {
+ std::string variant = "llama-b4920-bin-ubuntu-arm64.zip";
+ auto [version, arch] = engine_matcher_utils::GetVersionAndArch(variant);
+ EXPECT_EQ(version, "b4920");
+ EXPECT_EQ(arch, "ubuntu-arm64");
+ }
+
+ {
+ std::string variant = "llama-b4920-bin-win-avx2-x64.zip";
auto [version, arch] = engine_matcher_utils::GetVersionAndArch(variant);
- EXPECT_EQ(version, "v0.1.25-25.08.24");
- EXPECT_EQ(arch, "linux-amd64-avx-cuda-11-7");
+ EXPECT_EQ(version, "b4920");
+ EXPECT_EQ(arch, "win-avx2-x64");
}
{
- std::string variant = "cortex.llamacpp-0.1.25-windows-amd64-avx2.tar.gz";
+ std::string variant = "llama-b4920-bin-macos-x64.tar.gz";
auto [version, arch] = engine_matcher_utils::GetVersionAndArch(variant);
- EXPECT_EQ(version, "v0.1.25");
- EXPECT_EQ(arch, "windows-amd64-avx2");
+ EXPECT_EQ(version, "b4920");
+ EXPECT_EQ(arch, "macos-x64");
}
{
- std::string variant = "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz";
+ std::string variant = "llama-b4920-bin-ubuntu-vulkan-x64.zip";
auto [version, arch] = engine_matcher_utils::GetVersionAndArch(variant);
- EXPECT_EQ(version, "v0.1.25-25.08.24");
- EXPECT_EQ(arch, "mac-amd64");
+ EXPECT_EQ(version, "b4920");
+ EXPECT_EQ(arch, "ubuntu-vulkan-x64");
}
}
diff --git a/engine/test/components/test_function_calling.cc b/engine/test/components/test_function_calling.cc
deleted file mode 100644
index 7a4810b29..000000000
--- a/engine/test/components/test_function_calling.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-#include
-#include "gtest/gtest.h"
-#include "json/json.h"
-#include "utils/function_calling/common.h"
-
-class FunctionCallingUtilsTest : public ::testing::Test {
- protected:
- std::shared_ptr createTestRequest() {
- auto request = std::make_shared();
- (*request)["tools"] = Json::Value(Json::arrayValue);
- return request;
- }
-};
-
-TEST_F(FunctionCallingUtilsTest, ReplaceCustomFunctions) {
- std::string original = "Test placeholder";
- std::string replacement = "Custom function";
- std::string result =
- function_calling_utils::ReplaceCustomFunctions(original, replacement);
- EXPECT_EQ(result, "Test Custom function placeholder");
-}
-
-TEST_F(FunctionCallingUtilsTest, HasTools) {
- auto request = createTestRequest();
- EXPECT_FALSE(function_calling_utils::HasTools(request));
-
- (*request)["tools"].append(Json::Value());
- EXPECT_TRUE(function_calling_utils::HasTools(request));
-
- (*request)["tools"] = "random";
- EXPECT_FALSE(function_calling_utils::HasTools(request));
-
- (*request)["tools"] = Json::Value::null;
- EXPECT_FALSE(function_calling_utils::HasTools(request));
-}
-
-TEST_F(FunctionCallingUtilsTest, ProcessTools) {
- auto request = createTestRequest();
- Json::Value tool;
- tool["type"] = "function";
- tool["function"]["name"] = "test_function";
- tool["function"]["description"] = "Test description";
- (*request)["tools"].append(tool);
-
- std::string result = function_calling_utils::ProcessTools(request);
- EXPECT_TRUE(
- result.find("Use the function 'test_function' to: Test description") !=
- std::string::npos);
-}
-
-TEST_F(FunctionCallingUtilsTest, ParseMultipleFunctionStrings) {
- std::string input =
- "{\"arg\":\"value1\"}"
- "function>{\"arg\":\"value2\"}";
- Json::Value result =
- function_calling_utils::ParseMultipleFunctionStrings(input);
-
- ASSERT_EQ(result.size(), 2);
- EXPECT_EQ(result[0]["function"]["name"].asString(), "func1");
- EXPECT_EQ(result[0]["function"]["arguments"].asString(),
- "{\"arg\":\"value1\"}");
- EXPECT_EQ(result[1]["function"]["name"].asString(), "func2");
- EXPECT_EQ(result[1]["function"]["arguments"].asString(),
- "{\"arg\":\"value2\"}");
-}
-
-TEST_F(FunctionCallingUtilsTest, ConvertJsonToFunctionStrings) {
- Json::Value jsonArray(Json::arrayValue);
- Json::Value function1, function2;
- function1["function"]["name"] = "func1";
- function1["function"]["arguments"] = "{\"arg\":\"value1\"}";
- function2["function"]["name"] = "func2";
- function2["function"]["arguments"] = "{\"arg\":\"value2\"}";
- jsonArray.append(function1);
- jsonArray.append(function2);
-
- std::string result =
- function_calling_utils::ConvertJsonToFunctionStrings(jsonArray);
- EXPECT_EQ(result,
- "{\"arg\":\"value1\"}"
- "function>{\"arg\":\"value2\"}");
-}
-
-TEST_F(FunctionCallingUtilsTest, CreateCustomFunctionsString) {
- auto request = createTestRequest();
- Json::Value tool;
- tool["type"] = "function";
- tool["function"]["name"] = "test_function";
- tool["function"]["description"] = "Test description";
- (*request)["tools"].append(tool);
-
- std::string result =
- function_calling_utils::CreateCustomFunctionsString(request);
- EXPECT_TRUE(result.find("```") != std::string::npos);
- EXPECT_TRUE(
- result.find("Use the function 'test_function' to: Test description") !=
- std::string::npos);
-}
-
-TEST_F(FunctionCallingUtilsTest, IsValidToolChoiceFormat) {
- Json::Value validTool;
- validTool["type"] = "function";
- validTool["function"]["name"] = "test_function";
- EXPECT_TRUE(function_calling_utils::IsValidToolChoiceFormat(validTool));
-
- Json::Value invalidTool;
- EXPECT_FALSE(function_calling_utils::IsValidToolChoiceFormat(invalidTool));
-}
-
-TEST_F(FunctionCallingUtilsTest, UpdateMessages) {
- auto request = createTestRequest();
- std::string system_prompt = "Original prompt";
- (*request)["messages"] = Json::Value(Json::arrayValue);
-
- function_calling_utils::UpdateMessages(system_prompt, request);
-
- ASSERT_TRUE((*request)["messages"].isArray());
- EXPECT_EQ((*request)["messages"][0]["role"].asString(), "system");
- EXPECT_EQ((*request)["messages"][0]["content"].asString(), system_prompt);
-}
-
-TEST_F(FunctionCallingUtilsTest, PreprocessRequest) {
- auto request = createTestRequest();
- Json::Value tool;
- tool["type"] = "function";
- tool["function"]["name"] = "test_function";
- tool["function"]["description"] = "Test description";
- (*request)["tools"].append(tool);
-
- function_calling_utils::PreprocessRequest(request);
-
- ASSERT_TRUE((*request)["messages"].isArray());
- EXPECT_TRUE((*request)["messages"][0]["content"].asString().find(
- "Test description") != std::string::npos);
-}
-
-TEST_F(FunctionCallingUtilsTest, PostProcessResponse) {
- Json::Value response;
- response["choices"] = Json::Value(Json::arrayValue);
- Json::Value choice;
- choice["message"]["content"] =
- "{\"arg\":\"value\"}";
- response["choices"].append(choice);
-
- function_calling_utils::PostProcessResponse(response);
-
- EXPECT_EQ(response["choices"][0]["message"]["content"].asString(), "");
- EXPECT_TRUE(response["choices"][0]["message"]["tool_calls"].isArray());
- EXPECT_EQ(
- response["choices"][0]["message"]["tool_calls"][0]["function"]["name"]
- .asString(),
- "test_function");
- EXPECT_EQ(response["choices"][0]["message"]["tool_calls"][0]["function"]
- ["arguments"]
- .asString(),
- "{\"arg\":\"value\"}");
-}
\ No newline at end of file
diff --git a/engine/test/components/test_github_release_utils.cc b/engine/test/components/test_github_release_utils.cc
index ae1e2c7c2..20c14b187 100644
--- a/engine/test/components/test_github_release_utils.cc
+++ b/engine/test/components/test_github_release_utils.cc
@@ -4,16 +4,16 @@
class GitHubReleaseUtilsTest : public ::testing::Test {};
TEST_F(GitHubReleaseUtilsTest, AbleToGetReleaseByVersion) {
- auto version{"v0.1.36"};
+ auto version{"b4920"};
auto result = github_release_utils::GetReleaseByVersion(
- "menloresearch", "cortex.llamacpp", version);
+ kMenloOrg, "llama.cpp", version);
ASSERT_TRUE(result.has_value());
ASSERT_EQ(result->tag_name, version);
}
TEST_F(GitHubReleaseUtilsTest, AbleToGetReleaseList) {
- auto result = github_release_utils::GetReleases("menloresearch", "cortex.llamacpp");
+ auto result = github_release_utils::GetReleases(kMenloOrg, "llama.cpp");
ASSERT_TRUE(result.has_value());
ASSERT_TRUE(result->size() > 0);
diff --git a/engine/test/components/test_string_utils.cc b/engine/test/components/test_string_utils.cc
index 42211b668..e12046136 100644
--- a/engine/test/components/test_string_utils.cc
+++ b/engine/test/components/test_string_utils.cc
@@ -288,6 +288,47 @@ TEST_F(StringUtilsTestSuite, LargeInputPerformance) {
EXPECT_EQ(RemoveSubstring(large_input, to_remove), "");
}
+TEST(LTrimTest, EmptyString) {
+ std::string s = "";
+ LTrim(s);
+ EXPECT_EQ(s, "");
+}
+
+TEST(LTrimTest, NoSpaces) {
+ std::string s = "HelloWorld";
+ LTrim(s);
+ EXPECT_EQ(s, "HelloWorld");
+}
+
+TEST(LTrimTest, LeadingSpaces) {
+ std::string s = " HelloWorld";
+ LTrim(s);
+ EXPECT_EQ(s, "HelloWorld");
+}
+
+TEST(LTrimTest, LeadingTabs) {
+ std::string s = "\t\tHelloWorld";
+ LTrim(s);
+ EXPECT_EQ(s, "HelloWorld");
+}
+
+TEST(LTrimTest, LeadingNewlines) {
+ std::string s = "\n\nHelloWorld";
+ LTrim(s);
+ EXPECT_EQ(s, "HelloWorld");
+}
+
+TEST(LTrimTest, OnlySpaces) {
+ std::string s = " ";
+ LTrim(s);
+ EXPECT_EQ(s, "");
+}
+
+TEST(LTrimTest, MixedSpaces) {
+ std::string s = " \t\nHelloWorld ";
+ LTrim(s);
+ EXPECT_EQ(s, "HelloWorld ");
+}
TEST_F(StringUtilsTestSuite, UrlPaths_SimilarStrings) {
std::string str1 = "/v1/threads/{1}/messages/{2}";
diff --git a/engine/utils/cli_selection_utils.h b/engine/utils/cli_selection_utils.h
index dca6fe675..487c21e6b 100644
--- a/engine/utils/cli_selection_utils.h
+++ b/engine/utils/cli_selection_utils.h
@@ -27,13 +27,13 @@ inline void PrintMenu(
inline std::optional GetNumericValue(const std::string& sval) {
try {
- return std::stoi(sval);
+ return std::stoi(sval);
} catch (const std::invalid_argument&) {
- // Not a valid number
- return std::nullopt;
+ // Not a valid number
+ return std::nullopt;
} catch (const std::out_of_range&) {
- // Number out of range
- return std::nullopt;
+ // Number out of range
+ return std::nullopt;
}
}
@@ -73,14 +73,16 @@ inline std::optional PrintModelSelection(
}
// Validate if the selection consists solely of numeric characters
- if(!std::all_of(selection.begin(), selection.end(), ::isdigit)){
+ if (!std::all_of(selection.begin(), selection.end(), ::isdigit)) {
return std::nullopt;
}
// deal with out of range numeric values
std::optional numeric_value = GetNumericValue(selection);
-
- if (!numeric_value.has_value() || (unsigned) numeric_value.value() > availables.size() || numeric_value.value() < 1) {
+
+ if (!numeric_value.has_value() ||
+ (unsigned)numeric_value.value() > availables.size() ||
+ numeric_value.value() < 1) {
return std::nullopt;
}
@@ -101,13 +103,15 @@ inline std::optional PrintSelection(
}
// Validate if the selection consists solely of numeric characters
- if(!std::all_of(selection.begin(), selection.end(), ::isdigit)){
+ if (!std::all_of(selection.begin(), selection.end(), ::isdigit)) {
return std::nullopt;
}
-
+
// deal with out of range numeric values
std::optional numeric_value = GetNumericValue(selection);
- if (!numeric_value.has_value() ||(unsigned) numeric_value.value() > options.size() || numeric_value.value() < 1) {
+ if (!numeric_value.has_value() ||
+ (unsigned)numeric_value.value() > options.size() ||
+ numeric_value.value() < 1) {
return std::nullopt;
}
diff --git a/engine/utils/cuda_toolkit_utils.h b/engine/utils/cuda_toolkit_utils.h
index 748af1bd3..e7aadfdd6 100644
--- a/engine/utils/cuda_toolkit_utils.h
+++ b/engine/utils/cuda_toolkit_utils.h
@@ -7,32 +7,7 @@ inline std::string GetCompatibleCudaToolkitVersion(
const std::string& driver_semantic_version, const std::string& os,
const std::string& engine) {
- if (engine == "cortex.tensorrt-llm") {
- // if the engine is cortex.tensorrt-llm, the minimum required CUDA version is 12.4
- if (os == "windows") {
- if (semantic_version_utils::CompareSemanticVersion(
- driver_semantic_version, "527.41") >= 0) {
- return "12.4";
- } else {
- throw std::runtime_error(
- "GPU driver version not supported. Minimum "
- "required driver version is 527.41");
- }
- } else if (os == "linux") {
- if (semantic_version_utils::CompareSemanticVersion(
- driver_semantic_version, "525.60.13") >= 0) {
- return "12.4";
- } else {
- throw std::runtime_error(
- "GPU driver version not supported. Minimum required driver version "
- "is 525.60.13");
- }
- } else {
- throw std::runtime_error("Unsupported OS");
- }
- }
-
- if (os == "windows") {
+ if (os == "windows" || os == "win") {
if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
"527.41") >= 0) {
return "12.4";
@@ -44,7 +19,7 @@ inline std::string GetCompatibleCudaToolkitVersion(
"GPU driver version not supported. Minimum "
"required driver version is 452.39");
}
- } else if (os == "linux") {
+ } else if (os == "linux" || os == "ubuntu") {
if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
"525.60.13") >= 0) {
return "12.4";
diff --git a/engine/utils/dylib_path_manager.cc b/engine/utils/dylib_path_manager.cc
index 7c389df06..878620185 100644
--- a/engine/utils/dylib_path_manager.cc
+++ b/engine/utils/dylib_path_manager.cc
@@ -26,7 +26,7 @@ cpp::result DylibPathManager::RegisterPath(
}
return cpp::fail("Failed to add DLL directory: " + path.string());
} else {
- CTL_DBG("Added DLL directory: " << path.string());
+ CTL_INF("Added DLL directory: " << path.string());
}
dylib_paths.push_back({path, cookie});
diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h
index 2c5cd1be3..695afb4c5 100644
--- a/engine/utils/engine_constants.h
+++ b/engine/utils/engine_constants.h
@@ -5,20 +5,23 @@ constexpr const auto kLlamaEngine = "llama-cpp";
constexpr const auto kRemote = "remote";
constexpr const auto kLocal = "local";
+constexpr const auto kLlamaRepo = "llama.cpp";
+constexpr const auto kLlamaLibPath = "./engines/llama.cpp";
+constexpr const auto kLlamaServer = "llama-server";
-constexpr const auto kLlamaRepo = "cortex.llamacpp";
-
-constexpr const auto kLlamaLibPath = "./engines/cortex.llamacpp";
+constexpr const auto kMenloOrg = "menloresearch";
+constexpr const auto kGgmlOrg = "ggml-org";
// other constants
constexpr auto static kHuggingFaceHost = "huggingface.co";
constexpr auto static kGitHubHost = "api.github.com";
constexpr auto static kCortexFolderName = "cortexcpp";
-constexpr auto static kDefaultGHUserAgent = "menloresearch";
+constexpr auto static kDefaultGHUserAgent = kMenloOrg;
-constexpr auto static kWindowsOs = "windows";
+constexpr auto static kWindowsOs = "win";
constexpr auto static kMacOs = "mac";
constexpr auto static kLinuxOs = "linux";
+constexpr auto static kUbuntuOs = "ubuntu";
constexpr auto static kUnsupportedOs = "Unsupported OS";
constexpr auto static kCurlGetTimeout = 10;
diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h
index 0b0cb26be..1afdd194c 100644
--- a/engine/utils/engine_matcher_utils.h
+++ b/engine/utils/engine_matcher_utils.h
@@ -7,6 +7,7 @@
#include
#include
#include "utils/cpuid/cpu_info.h"
+#include "utils/engine_constants.h"
#include "utils/logging_utils.h"
#include "utils/result.hpp"
#include "utils/string_utils.h"
@@ -24,13 +25,19 @@ inline cpp::result GetVariantFromNameAndVersion(
if (engine.empty()) {
return cpp::fail("Engine name is empty");
}
- auto nv = string_utils::RemoveSubstring(version, "v");
- using namespace string_utils;
- auto removed_extension = RemoveSubstring(engine_file_name, ".tar.gz");
- auto version_and_variant = RemoveSubstring(removed_extension, engine + "-");
-
- auto variant = RemoveSubstring(version_and_variant, nv + "-");
- return variant;
+ CTL_DBG("version: " << version);
+ namespace su = string_utils;
+ CTL_DBG("engine_file_name: " << engine_file_name);
+ auto rm_extension_menlo = su::RemoveSubstring(engine_file_name, ".tar.gz");
+ auto rm_extension_ggml = su::RemoveSubstring(rm_extension_menlo, ".zip");
+ CTL_DBG("removed_extension: " << rm_extension_ggml);
+ auto version_and_variant =
+ su::RemoveSubstring(rm_extension_ggml, engine + "-");
+ CTL_DBG("version_and_variant: " << version_and_variant);
+ auto variant = su::RemoveSubstring(version_and_variant, version + "-");
+ auto v = su::RemoveSubstring(variant, "llama-bin-");
+ CTL_DBG("variant: " << v);
+ return v;
}
inline std::string GetSuitableAvxVariant(cortex::cpuid::CpuInfo& cpu_info) {
@@ -48,7 +55,7 @@ inline std::string GetSuitableAvxVariant(cortex::cpuid::CpuInfo& cpu_info) {
inline std::string GetSuitableCudaVariant(
const std::vector& variants, const std::string& cuda_version) {
- std::regex cuda_reg("cuda-(\\d+)-(\\d+)");
+ std::regex cuda_reg("cuda-cu(\\d+).(\\d+)");
std::smatch match;
int requested_major = 0;
@@ -141,8 +148,9 @@ inline std::string Validate(const std::vector& variants,
const std::string& os, const std::string& cpu_arch,
const std::string& suitable_avx,
const std::string& cuda_version) {
+ // CTL_INF(os << " " << cpu_arch);
// Early return if the OS is not supported
- if (os != "mac" && os != "windows" && os != "linux") {
+ if (os != kMacOs && os != kWindowsOs && os != kLinuxOs) {
return "";
}
@@ -150,6 +158,12 @@ inline std::string Validate(const std::vector& variants,
std::copy_if(variants.begin(), variants.end(),
std::back_inserter(os_and_arch_compatible_list),
[&os, &cpu_arch](const std::string& variant) {
+ // In case of Linux, we need to include ubuntu version also
+ if (os == kLinuxOs) {
+ if (variant.find(kUbuntuOs) != std::string::npos &&
+ variant.find(cpu_arch) != std::string::npos)
+ return true;
+ }
auto os_match = "-" + os;
auto cpu_arch_match = "-" + cpu_arch;
@@ -157,10 +171,10 @@ inline std::string Validate(const std::vector& variants,
variant.find(cpu_arch_match) != std::string::npos;
});
- if (os == "mac" && !os_and_arch_compatible_list.empty())
+ if (os == kMacOs && !os_and_arch_compatible_list.empty())
return os_and_arch_compatible_list[0];
- if (os == "linux" && cpu_arch == "arm64" &&
+ if (os == kLinuxOs && cpu_arch == "arm64" &&
!os_and_arch_compatible_list.empty()) {
return os_and_arch_compatible_list[0];
}
@@ -170,7 +184,14 @@ inline std::string Validate(const std::vector& variants,
std::copy_if(os_and_arch_compatible_list.begin(),
os_and_arch_compatible_list.end(),
std::back_inserter(avx_compatible_list),
- [&suitable_avx](const std::string& variant) {
+ [&os, &cpu_arch, &suitable_avx](const std::string& variant) {
+ if (os == kLinuxOs &&
+ (suitable_avx == "avx2" || suitable_avx == "avx512" ||
+ cpu_arch == "arm64")) {
+ if (variant.find(std::string(kUbuntuOs) + "-" + cpu_arch) !=
+ std::string::npos)
+ return true;
+ }
auto suitable_avx_match = "-" + suitable_avx;
return variant.find(suitable_avx_match) != std::string::npos;
@@ -185,15 +206,18 @@ inline std::string Validate(const std::vector& variants,
inline std::pair GetVersionAndArch(
const std::string& file_name) {
// Remove the file extension
- std::string base = file_name.substr(0, file_name.find("tar") - 1);
+ std::string b = string_utils::RemoveSubstring(file_name, ".tar.gz");
+ std::string base = string_utils::RemoveSubstring(b, ".zip");
size_t arch_pos = 0;
- if (base.find("windows") != std::string::npos) {
- arch_pos = base.find("-windows");
+ if (base.find("win") != std::string::npos) {
+ arch_pos = base.find("-bin-win");
} else if (base.find("linux") != std::string::npos) {
- arch_pos = base.find("-linux");
+ arch_pos = base.find("-bin-linux");
+ } else if (base.find("ubuntu") != std::string::npos) {
+ arch_pos = base.find("-bin-ubuntu");
} else {
- arch_pos = base.find("-mac");
+ arch_pos = base.find("-bin-macos");
}
// Extract architecture part
@@ -202,6 +226,6 @@ inline std::pair GetVersionAndArch(
// Extract version part
size_t v_pos = base.find_first_of('-');
auto version = base.substr(v_pos + 1, arch_pos - v_pos - 1);
- return std::pair("v" + version, arch);
+ return std::pair(version, string_utils::RemoveSubstring(arch, "bin-"));
}
} // namespace engine_matcher_utils
diff --git a/engine/utils/function_calling/common.h b/engine/utils/function_calling/common.h
index 34a1c9862..953a9964c 100644
--- a/engine/utils/function_calling/common.h
+++ b/engine/utils/function_calling/common.h
@@ -129,157 +129,4 @@ inline Json::Value ParseJsonString(const std::string& jsonString) {
return root;
}
-inline std::string CreateCustomFunctionsString(
- std::shared_ptr request) {
- std::string customFunctions = ProcessTools(request);
- if (customFunctions.empty()) {
- return ""; // No custom functions found
- }
-
- return "```\n" + customFunctions + "```";
-}
-inline bool IsValidToolChoiceFormat(const Json::Value& root) {
- return root.isObject() && root.isMember("type") && root["type"].isString() &&
- root["type"].asString() == "function" && root.isMember("function") &&
- root["function"].isObject() && root["function"].isMember("name") &&
- root["function"]["name"].isString();
-}
-inline void UpdateMessages(std::string& system_prompt,
- std::shared_ptr request) {
- Json::Value tool_choice = request->get("tool_choice", "auto");
- if (tool_choice.isString() && tool_choice.asString() == "required") {
- system_prompt +=
- "\n\nYou must call a function to answer the user's question.";
- } else if (!tool_choice.isString()) {
-
- system_prompt +=
- "\n\nNow this is your first priority: You must call the function '" +
- tool_choice["function"]["name"].asString() +
- "' to answer the user's question.";
- }
- bool parallel_tool_calls = request->get("parallel_tool_calls", true).asBool();
- if (!parallel_tool_calls) {
- system_prompt += "\n\nNow this is your first priority: You must call the only one function at a time.";
- }
-
- bool tools_call_in_user_message =
- request->get("tools_call_in_user_message", false).asBool();
-
- bool original_stream_config = (*request).get("stream", false).asBool();
- // (*request)["grammar"] = function_calling_utils::gamma_json;
- (*request)["stream"] =
- false; //when using function calling, disable stream automatically because we need to parse the response to get function name and params
-
- if (!request->isMember("messages") || !(*request)["messages"].isArray() ||
- (*request)["messages"].empty()) {
- // If no messages, add the system prompt as the first message
- Json::Value systemMessage;
- systemMessage["role"] = "system";
- systemMessage["content"] = system_prompt;
- (*request)["messages"].append(systemMessage);
- } else {
-
- if (tools_call_in_user_message) {
- for (Json::Value& message : (*request)["messages"]) {
- if (message["role"] == "user" && message.isMember("tools") &&
- message["tools"].isArray() && message["tools"].size() > 0) {
- message["content"] = system_prompt + "\n User question: " +
- message["content"].asString();
- }
- }
- } else {
- Json::Value& firstMessage = (*request)["messages"][0];
- if (firstMessage["role"] == "system") {
- bool addCustomPrompt =
- request->get("add_custom_system_prompt", true).asBool();
- if (addCustomPrompt) {
- firstMessage["content"] =
- system_prompt + "\n" + firstMessage["content"].asString();
- }
- } else {
- // If the first message is not a system message, prepend the system prompt
- Json::Value systemMessage;
- systemMessage["role"] = "system";
- systemMessage["content"] = system_prompt;
- (*request)["messages"].insert(0, systemMessage);
- }
- }
-
- // transform last message role to tool if it is a function call
- Json::Value& lastMessage =
- (*request)["messages"][(*request)["messages"].size() - 1];
- if (lastMessage.get("role", "") == "tool") {
- lastMessage["role"] = function_calling_llama3_1_utils::tool_role;
- (*request)["stream"] =
- original_stream_config; // if role is tool then should restore stream config to original value
- }
- }
- for (Json::Value& message : (*request)["messages"]) {
- if (message["role"] == "assistant" && message.isMember("tool_calls")) {
- const Json::Value& tool_calls = message["tool_calls"];
- if (!tool_calls.isNull() && tool_calls.isArray() &&
- tool_calls.size() > 0) {
- message["content"] = ConvertJsonToFunctionStrings(tool_calls);
- message["tool_calls"] = {};
- }
- }
- }
-}
-inline void PreprocessRequest(std::shared_ptr request) {
- if (!function_calling_utils::HasTools(request)) {
- return; // Exit if no tools present
- }
- if (request->get("tool_choice", "auto").isString()) {
- std::string tool_choice = request->get("tool_choice", "auto").asString();
- if (tool_choice == "none") {
- return; // Exit if tool_choice is none
- }
- }
- std::string customFunctionsString =
- function_calling_utils::CreateCustomFunctionsString(request);
- std::string new_system_prompt =
- function_calling_utils::ReplaceCustomFunctions(
- function_calling_llama3_1_utils::system_prompt,
- customFunctionsString);
- UpdateMessages(new_system_prompt, request);
-}
-
-inline void PostProcessResponse(Json::Value& response) {
- if (!response.isMember("choices") || !response["choices"].isArray() ||
- response["choices"].empty()) {
- // If there are no choices or the structure is incorrect, do nothing
- return;
- }
-
- // Get a reference to the first choice
- Json::Value& firstChoice = response["choices"][0];
-
- // Check if the choice has a message with content
- if (firstChoice.isMember("message") &&
- firstChoice["message"].isMember("content")) {
- std::string content = firstChoice["message"]["content"].asString();
-
- // Create a new structure for tool_calls
- Json::Value toolCall = ParseMultipleFunctionStrings(content);
- if (toolCall.size() > 0) {
- // Add tool_calls to the message
- if (response.get("tool_choice", "auto").isString()) {
- std::string tool_choice =
- response.get("tool_choice", "auto").asString();
- if (tool_choice == "auto") {
- firstChoice["finish_reason"] = "tool_calls";
- } else {
- firstChoice["finish_reason"] = "stop";
- }
- }
-
- firstChoice["message"]["tool_calls"] = toolCall;
-
- // Clear the content as it's now represented in tool_calls
- firstChoice["message"]["content"] = "";
- }
- }
-
- // Add any additional post-processing logic here
-}
} // namespace function_calling_utils
diff --git a/engine/utils/github_release_utils.h b/engine/utils/github_release_utils.h
index 29f8a5725..84636903a 100644
--- a/engine/utils/github_release_utils.h
+++ b/engine/utils/github_release_utils.h
@@ -178,11 +178,6 @@ inline cpp::result GetReleaseByVersion(
std::vector path_params{"repos", author, repo, "releases"};
if (tag != "latest") {
path_params.push_back("tags");
-
- if (!string_utils::StartsWith(tag, "v")) {
- path_params.push_back("v" + tag);
- }
-
path_params.push_back(tag);
} else {
path_params.push_back("latest");
diff --git a/engine/utils/process/utils.cc b/engine/utils/process/utils.cc
index f63de5c5e..c9ccddfdf 100644
--- a/engine/utils/process/utils.cc
+++ b/engine/utils/process/utils.cc
@@ -347,7 +347,7 @@ bool KillProcess(ProcessInfo& proc_info) {
bool success;
#if defined(_WIN32)
- success = TerminateJobObject(proc_info.hJob, 0) == 0;
+ success = TerminateJobObject(proc_info.hJob, 0);
#elif defined(__APPLE__) || defined(__linux__)
// we send SIGTERM to subprocess. we trust that this subprocess will
// propagate SIGTERM correctly to its children processes.
diff --git a/engine/utils/string_utils.h b/engine/utils/string_utils.h
index a9ea756b3..e1a567942 100644
--- a/engine/utils/string_utils.h
+++ b/engine/utils/string_utils.h
@@ -22,6 +22,12 @@ inline std::string RTrim(const std::string& str) {
return (end == std::string::npos) ? "" : str.substr(0, end + 1);
}
+inline void LTrim(std::string& s) {
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
+ return !std::isspace(ch);
+ }));
+};
+
inline void Trim(std::string& s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
return !std::isspace(ch);
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index 54eaed8c9..9bef6f4f9 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -70,7 +70,7 @@ inline std::unique_ptr GetSystemInfo() {
#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || \
defined(__amd64) || defined(__x86_64) || defined(_M_AMD64)
- arch << "amd64";
+ arch << "x64";
#elif defined(__arm__) || defined(__arm) || defined(__arm64__) || \
defined(__aarch64__) || defined(__thumb__) || \
defined(__TARGET_ARCH_ARM) || defined(__TARGET_ARCH_THUMB) || \
diff --git a/function-calling.py b/function-calling.py
new file mode 100644
index 000000000..32ef31752
--- /dev/null
+++ b/function-calling.py
@@ -0,0 +1,173 @@
+from datetime import datetime
+from openai import OpenAI
+from pydantic import BaseModel
+import json
+
+# MODEL = "deepseek-r1-distill-qwen-7b:7b"
+MODEL = "llama3.1:8b-q8"
+
+client = OpenAI(
+ base_url="http://localhost:39281/v1",
+ api_key="not-needed", # Authentication is not required for local deployment
+)
+
+tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_navigate",
+ "description": "Navigate to a URL",
+ "parameters": {
+ "properties": {"url": {"type": "string"}},
+ "required": ["url"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_screenshot",
+ "description": "Take a screenshot of the current page or a specific element",
+ "parameters": {
+ "properties": {
+ "height": {
+ "description": "Height in pixels (default: 600)",
+ "type": "number",
+ },
+ "name": {
+ "description": "Name for the screenshot",
+ "type": "string",
+ },
+ "selector": {
+ "description": "CSS selector for element to screenshot",
+ "type": "string",
+ },
+ "width": {
+ "description": "Width in pixels (default: 800)",
+ "type": "number",
+ },
+ },
+ "required": ["name"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_click",
+ "description": "Click an element on the page",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for element to click",
+ "type": "string",
+ }
+ },
+ "required": ["selector"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_fill",
+ "description": "Fill out an input field",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for input field",
+ "type": "string",
+ },
+ "value": {"description": "Value to fill", "type": "string"},
+ },
+ "required": ["selector", "value"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_select",
+ "description": "Select an element on the page with Select tag",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for element to select",
+ "type": "string",
+ },
+ "value": {"description": "Value to select", "type": "string"},
+ },
+ "required": ["selector", "value"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_hover",
+ "description": "Hover an element on the page",
+ "parameters": {
+ "properties": {
+ "selector": {
+ "description": "CSS selector for element to hover",
+ "type": "string",
+ }
+ },
+ "required": ["selector"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "puppeteer_evaluate",
+ "description": "Execute JavaScript in the browser console",
+ "parameters": {
+ "properties": {
+ "script": {
+ "description": "JavaScript code to execute",
+ "type": "string",
+ }
+ },
+ "required": ["script"],
+ "type": "object",
+ },
+ "strict": False,
+ },
+ },
+]
+
+completion_payload = {
+ "messages": [
+ {
+ "role": "system",
+ "content": 'You have access to the following CUSTOM functions:\n\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
+ },
+ {
+ "role": "user",
+ "content": "go to google search",
+ },
+ ]
+}
+
+response = client.chat.completions.create(
+ top_p=0.9,
+ temperature=0.6,
+ model=MODEL,
+ messages=completion_payload["messages"],
+ tools=tools,
+)
+
+print(response)
\ No newline at end of file