From 4f50812026f1675f1b74e68a756dd888eb534021 Mon Sep 17 00:00:00 2001 From: Thien Tran Date: Wed, 19 Mar 2025 08:48:45 +0800 Subject: [PATCH 1/4] chore: remove python engine (#2146) * remove python engine * remove docs * remove old methods * remove CI * remove model config * remove finetuning and some docs update * remove finetuning --- .github/workflows/python-script-package.yml | 72 -- .github/workflows/python-venv-package.yml | 275 ------ docs/docs/architecture.mdx | 1 - docs/docs/basic-usage/index.mdx | 3 +- docs/docs/capabilities/models/index.mdx | 2 - docs/docs/engines/python-engine.mdx | 246 ----- docs/sidebars.ts | 3 - engine/CMakeLists.txt | 1 - engine/cli/CMakeLists.txt | 3 +- engine/common/base.h | 3 - engine/common/download_task.h | 13 +- engine/config/model_config.h | 337 ------- engine/controllers/models.cc | 34 - engine/controllers/server.cc | 89 -- engine/controllers/server.h | 13 - engine/cortex-common/EngineI.h | 8 - engine/cortex-common/cortexpythoni.h | 22 - .../extensions/python-engine/python_engine.cc | 899 ------------------ .../extensions/python-engine/python_engine.h | 110 --- engine/services/engine_service.cc | 15 - engine/services/engine_service.h | 3 +- engine/services/inference_service.cc | 108 --- engine/services/inference_service.h | 8 - engine/services/model_service.cc | 125 +-- engine/utils/config_yaml_utils.h | 3 +- engine/utils/engine_constants.h | 3 - 26 files changed, 6 insertions(+), 2393 deletions(-) delete mode 100644 .github/workflows/python-script-package.yml delete mode 100644 .github/workflows/python-venv-package.yml delete mode 100644 docs/docs/engines/python-engine.mdx delete mode 100644 engine/cortex-common/cortexpythoni.h delete mode 100644 engine/extensions/python-engine/python_engine.cc delete mode 100644 engine/extensions/python-engine/python_engine.h diff --git a/.github/workflows/python-script-package.yml b/.github/workflows/python-script-package.yml deleted file mode 100644 index 5ea65be9c..000000000 --- a/.github/workflows/python-script-package.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: Build and Package Python Code - -on: - workflow_dispatch: - inputs: - model_dir: - description: "Path to model directory in github repo" - required: true - repo_name: - description: "name of repo to be checked out" - required: true - branch_name: - description: "name of branch to be checked out" - required: true - default: main - hf_repo: - description: "name of huggingface repo to be pushed" - required: true - hf_prefix_branch: - description: "prefix of hf branch" - required: false - -env: - MODEL_DIR: ${{ inputs.model_dir }} - REPO_NAME: ${{ inputs.repo_name}} - BRANCH_NAME: ${{ inputs.branch_name }} - HF_REPO: ${{ inputs.hf_repo }} - HF_PREFIX_BRANCH: ${{ inputs.hf_prefix_branch }} - -jobs: - build-and-test: - runs-on: ${{ matrix.runs-on }} - timeout-minutes: 3600 - strategy: - fail-fast: false - matrix: - include: - - os: "linux" - name: "amd64" - runs-on: "ubuntu-20-04-cuda-12-0" - - os: "mac" - name: "amd64" - runs-on: "macos-selfhosted-12" - - os: "mac" - name: "arm64" - runs-on: "macos-selfhosted-12-arm64" - - os: "windows" - name: "amd64" - runs-on: "windows-cuda-12-0" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - repository: ${{env.REPO_NAME}} - ref: ${{env.BRANCH_NAME}} - - name: use python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install hf-transfer huggingface_hub - - - name: Upload Artifact - run: | - huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential - cd ${{env.MODEL_DIR}} && huggingface-cli upload ${{env.HF_REPO}} . . --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }} - huggingface-cli logout \ No newline at end of file diff --git a/.github/workflows/python-venv-package.yml b/.github/workflows/python-venv-package.yml deleted file mode 100644 index 8bed4eb97..000000000 --- a/.github/workflows/python-venv-package.yml +++ /dev/null @@ -1,275 +0,0 @@ -name: Build and Package Python Virtual Environment - -on: - workflow_dispatch: - inputs: - model_dir: - description: "Path to model directory in github repo" - required: true - model_name: - description: "name of model to be release" - required: true - repo_name: - description: "name of repo to be checked out" - required: true - branch_name: - description: "name of branch to be checked out" - required: true - default: main - hf_repo: - description: "name of huggingface repo to be pushed" - required: true - hf_prefix_branch: - description: "prefix of hf branch" - required: false - - - -env: - MODEL_DIR: ${{ inputs.model_dir }} - MODEL_NAME: ${{ inputs.model_name }} - REPO_NAME: ${{ inputs.repo_name }} - BRANCH_NAME: ${{ inputs.branch_name }} - HF_REPO: ${{ inputs.hf_repo }} - HF_PREFIX_BRANCH: ${{ inputs.hf_prefix_branch }} - -jobs: - build-and-test: - runs-on: ${{ matrix.runs-on }} - timeout-minutes: 3600 - strategy: - fail-fast: false - matrix: - include: - - os: "linux" - name: "amd64" - runs-on: "ubuntu-20-04-cuda-12-0" - - os: "mac" - name: "amd64" - runs-on: "macos-selfhosted-12" - - os: "mac" - name: "arm64" - runs-on: "macos-selfhosted-12-arm64" - - os: "windows" - name: "amd64" - runs-on: "windows-cuda-12-0" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - repository: ${{env.REPO_NAME}} - ref: ${{env.BRANCH_NAME}} - - uses: conda-incubator/setup-miniconda@v3 - if: runner.os != 'windows' - with: - auto-update-conda: true - python-version: 3.11 - - name: use python - if : runner.os == 'windows' - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Get Cer for code signing - if: runner.os == 'macOS' - run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12 - shell: bash - env: - CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }} - - - uses: apple-actions/import-codesign-certs@v2 - continue-on-error: true - if: runner.os == 'macOS' - with: - p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }} - p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }} - - - name: Get Cer for code signing - if: runner.os == 'macOS' - run: base64 -d <<< "$NOTARIZE_P8_BASE64" > /tmp/notary-key.p8 - shell: bash - env: - NOTARIZE_P8_BASE64: ${{ secrets.NOTARIZE_P8_BASE64 }} - - - name: Install dependencies Windows - if: runner.os == 'windows' - shell: pwsh - run: | - python3 -m pip install fastapi - python3 -m pip freeze | % { python3 -m pip uninstall -y $_ } - python3 -m pip install --upgrade pip - python3 -m pip install -I -r ${{env.MODEL_DIR}}/requirements.cuda.txt - python3 -m pip install python-dotenv - - name: Install dependencies Linux - if: runner.os == 'linux' - run: | - conda create -y -n ${{env.MODEL_NAME}} python=3.11 - source $HOME/miniconda3/bin/activate base - conda init - conda activate ${{env.MODEL_NAME}} - python -m pip install fastapi - python -m pip freeze | xargs python -m pip uninstall -y - python -m pip install --upgrade pip - python -m pip install -r ${{env.MODEL_DIR}}/requirements.cuda.txt - python -m pip install python-dotenv - - name: Install dependencies Mac - if: runner.os == 'macOS' - run: | - conda create -y -n ${{env.MODEL_NAME}} python=3.11 - source $HOME/miniconda3/bin/activate base - conda init - conda activate ${{env.MODEL_NAME}} - python -m pip install fastapi - python -m pip freeze | xargs python -m pip uninstall -y - python -m pip install --upgrade pip - python -m pip install -r ${{env.MODEL_DIR}}/requirements.txt - python -m pip install python-dotenv - - - name: prepare python package windows - if : runner.os == 'windows' - shell: pwsh - run: | - $pythonPath = where.exe python - echo "Python path (where.exe): $pythonPath" - $pythonFolder = Split-Path -Path "$pythonPath" -Parent - echo "PYTHON_FOLDER=$pythonFolder" >> $env:GITHUB_ENV - copy "$pythonFolder\python*.*" "$pythonFolder\Scripts\" - - - name: prepare python package macos - if : runner.os == 'macOs' - run: | - source $HOME/miniconda3/bin/activate base - conda init - conda activate ${{env.MODEL_NAME}} - PYTHON_PATH=$(which python) - echo $PYTHON_PATH - PYTHON_FOLDER=$(dirname $(dirname "$PYTHON_PATH")) - echo "PYTHON_FOLDER=$PYTHON_FOLDER" >> $GITHUB_ENV - echo "github end PYTHON_FOLDER: ${{env.PYTHON_FOLDER}}" - - name: prepare python package linux - if : runner.os == 'linux' - run: | - source $HOME/miniconda3/bin/activate base - conda init - conda activate ${{env.MODEL_NAME}} - PYTHON_PATH=$(which python) - echo $PYTHON_PATH - PYTHON_FOLDER=$(dirname $(dirname "$PYTHON_PATH")) - rm -rf $PYTHON_FOLDER/lib/python3.1 - echo "PYTHON_FOLDER=$PYTHON_FOLDER" >> $GITHUB_ENV - echo "github end PYTHON_FOLDER: ${{env.PYTHON_FOLDER}}" - - - name: create plist file - if: runner.os == 'macOS' - run: | - cat << EOF > /tmp/entitlements.plist - - - - - - com.apple.security.cs.allow-jit - - com.apple.security.cs.allow-unsigned-executable-memory - - - - com.apple.security.app-sandbox - - com.apple.security.network.client - - com.apple.security.network.server - - com.apple.security.device.audio-input - - com.apple.security.device.microphone - - com.apple.security.device.camera - - com.apple.security.files.user-selected.read-write - - com.apple.security.cs.disable-library-validation - - com.apple.security.cs.allow-dyld-environment-variables - - com.apple.security.cs.allow-executable-memory - - - - EOF - - - name: Notary macOS Binary - if: runner.os == 'macOS' - run: | - codesign --force --entitlements="/tmp/entitlements.plist" -s "${{ secrets.DEVELOPER_ID }}" --options=runtime ${{env.PYTHON_FOLDER}}/bin/python - codesign --force --entitlements="/tmp/entitlements.plist" -s "${{ secrets.DEVELOPER_ID }}" --options=runtime ${{env.PYTHON_FOLDER}}/bin/python3 - # Code sign all .so files and .dylib files - - find ${{env.PYTHON_FOLDER}} -type f \( -name "*.so" -o -name "*.dylib" \) -exec codesign --force --entitlements="/tmp/entitlements.plist" -s "${{ secrets.DEVELOPER_ID }}" --options=runtime {} \; - - curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sudo sh -s -- -b /usr/local/bin - # Notarize the binary - quill notarize ${{env.PYTHON_FOLDER}}/bin/python - quill notarize ${{env.PYTHON_FOLDER}}/bin/python3 - find ${{env.PYTHON_FOLDER}} -type f \( -name "*.so" -o -name "*.dylib" \) -exec quill notarize {} \; - env: - QUILL_NOTARY_KEY_ID: ${{ secrets.NOTARY_KEY_ID }} - QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }} - QUILL_NOTARY_KEY: "/tmp/notary-key.p8" - - - - name: Upload Artifact MacOS - if : runner.os == 'macOS' - run: | - brew install zip - cd ${{env.PYTHON_FOLDER}} && zip -r venv.zip * - conda create -y -n hf-upload python=3.11 - source $HOME/miniconda3/bin/activate base - conda init - conda activate hf-upload - python -m pip install hf-transfer huggingface_hub - huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential - huggingface-cli upload ${{env.HF_REPO}} venv.zip --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }} - rm -rf venv.zip - huggingface-cli logout - - - name: Upload Artifact Linux - if : runner.os == 'linux' - run: | - sudo apt-get install -y zip - cd ${{env.PYTHON_FOLDER}} && zip -r venv.zip * - conda create -y -n hf-upload python=3.11 - source $HOME/miniconda3/bin/activate base - conda init - conda activate hf-upload - python -m pip install hf-transfer huggingface_hub - huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential - huggingface-cli upload ${{env.HF_REPO}} venv.zip --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }} - rm -rf venv.zip - huggingface-cli logout - - - - name: Upload Artifact Windows - if : runner.os == 'windows' - shell: pwsh - run: | - Compress-Archive -Path ${{env.PYTHON_FOLDER}}/* -DestinationPath venv.zip - python -m pip install hf-transfer huggingface_hub - huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential - huggingface-cli upload ${{env.HF_REPO}} venv.zip --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }} - rm venv.zip - huggingface-cli logout - - - - name: Post Upload windows - if : runner.os == 'windows' - run: | - rm ${{env.PYTHON_FOLDER}}/Scripts/python*.* - - - name: Remove Keychain - continue-on-error: true - if: always() && runner.os == 'macOS' - run: | - security delete-keychain signing_temp.keychain diff --git a/docs/docs/architecture.mdx b/docs/docs/architecture.mdx index 8e9520810..cad463ca3 100644 --- a/docs/docs/architecture.mdx +++ b/docs/docs/architecture.mdx @@ -144,4 +144,3 @@ The sequence diagram above outlines the interactions between various components Our development roadmap outlines key features and epics we will focus on in the upcoming releases. These enhancements aim to improve functionality, increase efficiency, and expand Cortex's capabilities. - **RAG**: Improve response quality and contextual relevance in our AI models. -- **Cortex Python Runtime**: Provide a scalable Python execution environment for Cortex. diff --git a/docs/docs/basic-usage/index.mdx b/docs/docs/basic-usage/index.mdx index 837d78733..1aaac36be 100644 --- a/docs/docs/basic-usage/index.mdx +++ b/docs/docs/basic-usage/index.mdx @@ -35,8 +35,7 @@ curl --request DELETE \ ``` ## Engines -Cortex currently supports a general Python Engine for highly customised deployments and -2 specialized ones for different multi-modal foundation models: llama.cpp and ONNXRuntime. +Cortex currently supports 2 specialized ones for different multi-modal foundation models: llama.cpp and ONNXRuntime. By default, Cortex installs `llama.cpp` as it main engine as it can be used in most laptops, desktop environments and operating systems. diff --git a/docs/docs/capabilities/models/index.mdx b/docs/docs/capabilities/models/index.mdx index beda81e69..d33d46eae 100644 --- a/docs/docs/capabilities/models/index.mdx +++ b/docs/docs/capabilities/models/index.mdx @@ -22,8 +22,6 @@ Cortex supports three model formats and each model format require specific engin - GGUF - run with `llama-cpp` engine - ONNX - run with `onnxruntime` engine -Within the Python Engine (currently under development), you can run models in other formats - :::info For details on each format, see the [Model Formats](/docs/capabilities/models/model-yaml#model-formats) page. ::: diff --git a/docs/docs/engines/python-engine.mdx b/docs/docs/engines/python-engine.mdx deleted file mode 100644 index 5839a346c..000000000 --- a/docs/docs/engines/python-engine.mdx +++ /dev/null @@ -1,246 +0,0 @@ ---- -title: Python Engine -description: Interface for running Python processes through Cortex ---- - -:::warning -🚧 Cortex.cpp is currently under active development. Our documentation outlines the intended -behavior of Cortex, which may not yet be fully implemented in the codebase. -::: - -The Python Engine manages Python processes that run models via Cortex. Each Python program is treated as -a model with its own `model.yml` configuration template. All requests are routed through Cortex using HTTP. - -## Python Engine Implementation - -The Python Engine is implemented as a C++ package called [EngineI](/docs/engines/engine-extension). It exposes these core methods: - -- `LoadModel`: Starts Python process and loads model -- `UnloadModel`: Stops process and unloads model -- `GetModelStatus`: Health check for running processes -- `GetModels`: Lists active Python models - -Additional methods: -- `HandleInference`: Routes inference requests to Python process -- `HandleRouteRequest`: Routes arbitrary requests to Python process - -The Python Engine is built into Cortex.cpp and loads automatically when needed. - -## Model Configuration - -Each Python model requires a `model.yml` configuration file: - -```yaml -id: ichigo-0.5:fp16-linux-amd64 -model: ichigo-0.5:fp16-linux-amd64 -name: Ichigo Wrapper -version: 1 - -port: 22310 -script: src/app.py -log_path: ichigo-wrapper.log -log_level: INFO -command: - - python -files: - - /home/thuan/cortexcpp/models/cortex.so/ichigo-0.5/fp16-linux-amd64 -depends: - - ichigo-0.4:8b-gguf-q4-km - - whispervq:fp16-linux-amd64 - - fish-speech:fp16-linux-amd64 -engine: python-engine -extra_params: - device_id: 0 - fish_speech_port: 22312 - ichigo_model: ichigo-0.4:8b-gguf-q4-km - ichigo_port: 39281 - whisper_port: 3348 -``` - -| **Parameter** | **Description** | **Required** | -|-----------------|-----------------------------------------------------------------------------------------------------------|--------------| -| `id` | Unique identifier for the model, typically includes version and platform information. | Yes | -| `model` | Specifies the variant of the model, often denoting size or quantization details. | Yes | -| `name` | The human-readable name for the model, used as the `model_id`. | Yes | -| `version` | The specific version number of the model. | Yes | -| `port` | The network port on which the Python program will listen for requests. | Yes | -| `script` | Path to the main Python script to be executed by the engine. This is relative path to the model folder | Yes | -| `log_path` | File location where logs will be stored for the Python program's execution. log_path is relative path of cortex data folder | No | -| `log_level` | The level of logging detail (e.g., INFO, DEBUG). | No | -| `command` | The command used to launch the Python program, typically starting with 'python'. | Yes | -| `files` | For python models, the files is the path to folder contains all python scripts, model binary and environment to run the program | No | -| `depends` | Dependencies required by the model, specified by their identifiers. The dependencies are other models | No | -| `engine` | Specifies the engine to use, which in this context is 'python-engine'. | Yes | -| `extra_params` | Additional parameters passed to the Python script at runtime | No | - -## Example: Ichigo Python Model - -[Ichigo python](https://github.com/menloresearch/ichigo) is a built-in Cortex model for chat with audio support. - -### Required Models - -Ichigo requires these models: - -- ichigo-0.5 -- whispervq -- ichigo-0.4 -- fish-speech (optional, for text-to-speech) - -Download models for your platform (example for Linux AMD64): - -```sh -curl --location '127.0.0.1:39281/v1/models/pull' \ - --header 'Content-Type: application/json' \ - --data '{"model":"ichigo-0.5:fp16-linux-amd64"}' - -curl --location '127.0.0.1:39281/v1/models/pull' \ - --header 'Content-Type: application/json' \ - --data '{"model":"ichigo-0.4:8b-gguf-q4-km"}' - -curl --location '127.0.0.1:39281/v1/models/pull' \ - --header 'Content-Type: application/json' \ - --data '{"model":"whispervq:fp16-linux-amd64"}' - -curl --location '127.0.0.1:39281/v1/models/pull' \ - --header 'Content-Type: application/json' \ - --data '{"model":"fish-speech:fp16-linux-amd64"}' -``` - -### Model Management - -Start model: -```sh -curl --location '127.0.0.1:39281/v1/models/start' \ ---header 'Content-Type: application/json' \ ---data '{"model":"ichigo-0.5:fp16-linux-amd64"}' -``` - -Check status: -```sh -curl --location '127.0.0.1:39281/v1/models/status/fish-speech:fp16-linux-amd64' -``` - -Stop model: -```sh -curl --location '127.0.0.1:39281/v1/models/stop' \ ---header 'Content-Type: application/json' \ ---data '{"model":"ichigo-0.5:fp16-linux-amd64"}' -``` - -### Inference - -Example inference request: -```sh -curl --location '127.0.0.1:39281/v1/inference' \ ---header 'Content-Type: application/json' \ ---data '{ - "model":"ichigo-0.5:fp16-linux-amd64", - "engine":"python-engine", - "body":{ - "messages": [{ - "role":"system", - "content":"you are helpful assistant, you must answer questions short and concil!" - }], - "input_audio": { - "data": "base64_encoded_audio_data", - "format": "wav" - }, - "model": "ichigo-0.4:8b-gguf-q4km", - "stream": true, - "temperature": 0.7, - "top_p": 0.9, - "max_tokens": 2048, - "presence_penalty": 0, - "frequency_penalty": 0, - "stop": ["<|eot_id|>"], - "output_audio": true - } -}' -``` - -### Route Requests - -Generic request routing example: -```sh -curl --location '127.0.0.1:39281/v1/route/request' \ ---header 'Content-Type: application/json' \ ---data '{ - "model":"whispervq:fp16", - "path":"/inference", - "engine":"python-engine", - "method":"post", - "transform_response":"{ {%- set first = true -%} {%- for key, value in input_request -%} {%- if key == \"tokens\" -%} {%- if not first -%},{%- endif -%} \"{{ key }}\": {{ tojson(value) }} {%- set first = false -%} {%- endif -%} {%- endfor -%} }", - "body": { - "data": "base64 data", - "format": "wav" - } -}' -``` - -## Adding New Python Models - -### Implementation Requirements - -Python models must expose at least two endpoints: -- `/health`: Server status check -- `/inference`: Model inference - -Example server implementation: - -```python -import argparse -import os -import sys -from pathlib import Path -from contextlib import asynccontextmanager -from typing import AsyncGenerator, List -import uvicorn -from dotenv import load_dotenv -from fastapi import APIRouter, FastAPI -from common.utility.logger_utility import LoggerUtility -from services.audio.audio_controller import AudioController -from services.audio.implementation.audio_service import AudioService -from services.health.health_controller import HealthController - -def create_app() -> FastAPI: - routes: List[APIRouter] = [ - HealthController(), - AudioController() - ] - app = FastAPI() - for route in routes: - app.include_router(route) - return app - -def parse_argument(): - parser = argparse.ArgumentParser(description="Ichigo-wrapper Application") - parser.add_argument('--log_path', type=str, default='Ichigo-wrapper.log', help='The log file path') - parser.add_argument('--log_level', type=str, default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE']) - parser.add_argument('--port', type=int, default=22310) - parser.add_argument('--device_id', type=str, default="0") - parser.add_argument('--package_dir', type=str, default="") - parser.add_argument('--whisper_port', type=int, default=3348) - parser.add_argument('--ichigo_port', type=int, default=39281) - parser.add_argument('--fish_speech_port', type=int, default=22312) - parser.add_argument('--ichigo_model', type=str, default="ichigo:8b-gguf-q4-km") - return parser.parse_args() - -if __name__ == "__main__": - args = parse_argument() - LoggerUtility.init_logger(__name__, args.log_level, args.log_path) - env_path = Path(os.path.dirname(os.path.realpath(__file__))) / "variables" / ".env" - AudioService.initialize(args.whisper_port, args.ichigo_port, args.fish_speech_port, args.ichigo_model) - load_dotenv(dotenv_path=env_path) - app = create_app() - print("Server is running at: 0.0.0.0:", args.port) - uvicorn.run(app=app, host="0.0.0.0", port=args.port) -``` - -### Deployment - -1. Create model files following the example above -2. Add required `requirements.txt` and `requirements.cuda.txt` files -3. Trigger the [Python Script Package CI](https://github.com/menloresearch/cortex.cpp/actions/workflows/python-script-package.yml) -4. Trigger the [Python Venv Package CI](https://github.com/menloresearch/cortex.cpp/actions/workflows/python-venv-package.yml) - -The CIs will build and publish your model to Hugging Face where it can then be downloaded and used. diff --git a/docs/sidebars.ts b/docs/sidebars.ts index dde3da69d..cb8a05995 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -148,9 +148,6 @@ const sidebars: SidebarsConfig = { collapsed: true, items: [ { type: "doc", id: "engines/llamacpp", label: "llama.cpp" }, - { type: "doc", id: "engines/python-engine", label: "python engine" }, - // { type: "doc", id: "engines/tensorrt-llm", label: "TensorRT-LLM" }, - // { type: "doc", id: "engines/onnx", label: "ONNX" }, { type: "doc", id: "engines/engine-extension", diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index be8fe200d..55bdf9da2 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -149,7 +149,6 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc - ${CMAKE_CURRENT_SOURCE_DIR}/extensions/python-engine/python_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/dylib_path_manager.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/process/utils.cc diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index 0f0b2b48d..c2a07cbd9 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -85,8 +85,7 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/database_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc - - ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc diff --git a/engine/common/base.h b/engine/common/base.h index b5de09059..fcaee860a 100644 --- a/engine/common/base.h +++ b/engine/common/base.h @@ -20,9 +20,6 @@ class BaseModel { virtual void GetModels( const HttpRequestPtr& req, std::function&& callback) = 0; - virtual void FineTuning( - const HttpRequestPtr& req, - std::function&& callback) = 0; }; class BaseChatCompletion { diff --git a/engine/common/download_task.h b/engine/common/download_task.h index 53f1902c5..95e736394 100644 --- a/engine/common/download_task.h +++ b/engine/common/download_task.h @@ -6,14 +6,7 @@ #include #include -enum class DownloadType { - Model, - Engine, - Miscellaneous, - CudaToolkit, - Cortex, - Environments -}; +enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex }; struct DownloadItem { @@ -55,8 +48,6 @@ inline std::string DownloadTypeToString(DownloadType type) { return "CudaToolkit"; case DownloadType::Cortex: return "Cortex"; - case DownloadType::Environments: - return "Environments"; default: return "Unknown"; } @@ -73,8 +64,6 @@ inline DownloadType DownloadTypeFromString(const std::string& str) { return DownloadType::CudaToolkit; } else if (str == "Cortex") { return DownloadType::Cortex; - } else if (str == "Environments") { - return DownloadType::Environments; } else { return DownloadType::Miscellaneous; } diff --git a/engine/config/model_config.h b/engine/config/model_config.h index e95a94278..b2d67a103 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -451,341 +451,4 @@ struct ModelConfig { return oss.str(); } }; - -struct Endpoint { - std::string method; - std::string path; - std::string transform_request; - std::string transform_response; -}; - -struct PythonModelConfig { - // General Metadata - std::string id; - std::string model; - std::string name; - int version; - - // Inference Parameters - Endpoint load_model; - Endpoint destroy; - Endpoint inference; - Endpoint heath_check; - std::vector extra_endpoints; - - // Model Load Parameters - std::string port; - std::string script; - std::string log_path; - std::string log_level; - std::string environment; - std::vector command; // New command field - std::vector files; - std::vector depends; - std::string engine; - Json::Value extra_params; // Accept dynamic extra parameters - - // Method to convert C++ struct to YAML - void ToYaml(const std::string& filepath) const { - YAML::Emitter out; - out << YAML::BeginMap; - - out << YAML::Key << "id" << YAML::Value << id; - out << YAML::Key << "model" << YAML::Value << model; - out << YAML::Key << "name" << YAML::Value << name; - out << YAML::Key << "version" << YAML::Value << version; - - // Inference Parameters - out << YAML::Key << "load_model" << YAML::Value << YAML::BeginMap; - out << YAML::Key << "method" << YAML::Value << load_model.method; - out << YAML::Key << "path" << YAML::Value << load_model.path; - out << YAML::Key << "transform_request" << YAML::Value - << load_model.transform_request; - out << YAML::Key << "transform_response" << YAML::Value - << load_model.transform_response; - out << YAML::EndMap; - - out << YAML::Key << "destroy" << YAML::Value << YAML::BeginMap; - out << YAML::Key << "method" << YAML::Value << destroy.method; - out << YAML::Key << "path" << YAML::Value << destroy.path; - out << YAML::EndMap; - - out << YAML::Key << "inference" << YAML::Value << YAML::BeginMap; - out << YAML::Key << "method" << YAML::Value << inference.method; - out << YAML::Key << "path" << YAML::Value << inference.path; - out << YAML::EndMap; - - out << YAML::Key << "extra_endpoints" << YAML::Value << YAML::BeginSeq; - for (const auto& endpoint : extra_endpoints) { - out << YAML::BeginMap; - out << YAML::Key << "method" << YAML::Value << endpoint.method; - out << YAML::Key << "path" << YAML::Value << endpoint.path; - out << YAML::EndMap; - } - out << YAML::EndSeq; - - // Model Load Parameters - out << YAML::Key << "port" << YAML::Value << port; - out << YAML::Key << "script" << YAML::Value << script; - out << YAML::Key << "log_path" << YAML::Value << log_path; - out << YAML::Key << "log_level" << YAML::Value << log_level; - out << YAML::Key << "environment" << YAML::Value << environment; - - // Serialize command as YAML list - out << YAML::Key << "command" << YAML::Value << YAML::BeginSeq; - for (const auto& cmd : command) { - out << cmd; - } - out << YAML::EndSeq; - - // Serialize files as YAML list - out << YAML::Key << "files" << YAML::Value << YAML::BeginSeq; - for (const auto& file : files) { - out << file; - } - out << YAML::EndSeq; - - // Serialize command as YAML list - out << YAML::Key << "depends" << YAML::Value << YAML::BeginSeq; - for (const auto& depend : depends) { - out << depend; - } - out << YAML::EndSeq; - - out << YAML::Key << "engine" << YAML::Value << engine; - - // Serialize extra_params as YAML - out << YAML::Key << "extra_params" << YAML::Value << YAML::BeginMap; - for (Json::ValueConstIterator iter = extra_params.begin(); - iter != extra_params.end(); ++iter) { - out << YAML::Key << iter.key().asString() << YAML::Value - << iter->asString(); - } - out << YAML::EndMap; - - std::ofstream fout(filepath); - if (!fout.is_open()) { - throw std::runtime_error("Failed to open file for writing: " + filepath); - } - fout << out.c_str(); - } - - // Method to populate struct from YAML file - void ReadFromYaml(const std::string& filePath) { - YAML::Node config = YAML::LoadFile(filePath); - - if (config["id"]) - id = config["id"].as(); - if (config["model"]) - model = config["model"].as(); - if (config["name"]) - name = config["name"].as(); - if (config["version"]) - version = config["version"].as(); - - // Inference Parameters - - auto ip = config; - if (ip["load_model"]) { - load_model.method = ip["load_model"]["method"].as(); - load_model.path = ip["load_model"]["path"].as(); - load_model.transform_request = - ip["load_model"]["transform_request"].as(); - load_model.transform_response = - ip["load_model"]["transform_response"].as(); - } - if (ip["destroy"]) { - destroy.method = ip["destroy"]["method"].as(); - destroy.path = ip["destroy"]["path"].as(); - } - if (ip["inference"]) { - inference.method = ip["inference"]["method"].as(); - inference.path = ip["inference"]["path"].as(); - } - if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) { - for (const auto& endpoint : ip["extra_endpoints"]) { - Endpoint e; - e.method = endpoint["method"].as(); - e.path = endpoint["path"].as(); - extra_endpoints.push_back(e); - } - } - - // Model Load Parameters - - auto mlp = config; - if (mlp["port"]) - port = mlp["port"].as(); - if (mlp["script"]) - script = mlp["script"].as(); - if (mlp["log_path"]) - log_path = mlp["log_path"].as(); - if (mlp["log_level"]) - log_level = mlp["log_level"].as(); - if (mlp["environment"]) - environment = mlp["environment"].as(); - if (mlp["engine"]) - engine = mlp["engine"].as(); - - if (mlp["command"] && mlp["command"].IsSequence()) { - for (const auto& cmd : mlp["command"]) { - command.push_back(cmd.as()); - } - } - - if (mlp["files"] && mlp["files"].IsSequence()) { - for (const auto& file : mlp["files"]) { - files.push_back(file.as()); - } - } - - if (mlp["depends"] && mlp["depends"].IsSequence()) { - for (const auto& depend : mlp["depends"]) { - depends.push_back(depend.as()); - } - } - - if (mlp["extra_params"]) { - for (YAML::const_iterator it = mlp["extra_params"].begin(); - it != mlp["extra_params"].end(); ++it) { - extra_params[it->first.as()] = - it->second.as(); - } - } - } - - // Method to convert the struct to JSON - Json::Value ToJson() const { - Json::Value root; - - root["id"] = id; - root["model"] = model; - root["name"] = name; - root["version"] = version; - - // Inference Parameters - root["load_model"]["method"] = load_model.method; - root["load_model"]["path"] = load_model.path; - root["load_model"]["transform_request"] = load_model.transform_request; - root["load_model"]["transform_response"] = load_model.transform_response; - - root["destroy"]["method"] = destroy.method; - root["destroy"]["path"] = destroy.path; - - root["inference"]["method"] = inference.method; - root["inference"]["path"] = inference.path; - - for (const auto& endpoint : extra_endpoints) { - Json::Value e; - e["method"] = endpoint.method; - e["path"] = endpoint.path; - root["extra_endpoints"].append(e); - } - - // Model Load Parameters - root["port"] = port; - root["log_path"] = log_path; - root["log_level"] = log_level; - root["environment"] = environment; - root["script"] = script; - - // Serialize command as JSON array - for (const auto& cmd : command) { - root["command"].append(cmd); - } - - for (const auto& file : files) { - root["files"].append(file); - } - - for (const auto& depend : depends) { - root["depends"].append(depend); - } - - root["engine"] = engine; - root["extra_params"] = extra_params; // Serialize the JSON value directly - - return root; - } - - // Method to populate struct from JSON - void FromJson(const Json::Value& root) { - - if (root.isMember("id")) - id = root["id"].asString(); - if (root.isMember("model")) - model = root["model"].asString(); - if (root.isMember("name")) - name = root["name"].asString(); - if (root.isMember("version")) - version = root["version"].asInt(); - - // Inference Parameters - - const Json::Value& ip = root; - if (ip.isMember("load_model")) { - load_model.method = ip["load_model"]["method"].asString(); - load_model.path = ip["load_model"]["path"].asString(); - load_model.transform_request = - ip["load_model"]["transform_request"].asString(); - load_model.transform_response = - ip["load_model"]["transform_response"].asString(); - } - if (ip.isMember("destroy")) { - destroy.method = ip["destroy"]["method"].asString(); - destroy.path = ip["destroy"]["path"].asString(); - } - if (ip.isMember("inference")) { - inference.method = ip["inference"]["method"].asString(); - inference.path = ip["inference"]["path"].asString(); - } - if (ip.isMember("extra_endpoints")) { - for (const auto& endpoint : ip["extra_endpoints"]) { - Endpoint e; - e.method = endpoint["method"].asString(); - e.path = endpoint["path"].asString(); - extra_endpoints.push_back(e); - } - } - - // Model Load Parameters - - const Json::Value& mlp = root; - if (mlp.isMember("port")) - port = mlp["port"].asString(); - if (mlp.isMember("log_path")) - log_path = mlp["log_path"].asString(); - if (mlp.isMember("log_level")) - log_level = mlp["log_level"].asString(); - if (mlp.isMember("environment")) - environment = mlp["environment"].asString(); - if (mlp.isMember("engine")) - engine = mlp["engine"].asString(); - if (mlp.isMember("script")) - script = mlp["script"].asString(); - - if (mlp.isMember("command")) { - for (const auto& cmd : mlp["command"]) { - command.push_back(cmd.asString()); - } - } - - if (mlp.isMember("files")) { - for (const auto& file : mlp["files"]) { - files.push_back(file.asString()); - } - } - - if (mlp.isMember("depends")) { - for (const auto& depend : mlp["depends"]) { - depends.push_back(depend.asString()); - } - } - - if (mlp.isMember("extra_params")) { - extra_params = mlp["extra_params"]; // Directly assign the JSON value - } - } -}; - } // namespace config diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index d88efc254..3215da753 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -224,16 +224,6 @@ void Models::ListModel( } data.append(std::move(obj)); yaml_handler.Reset(); - } else if (model_config.engine == kPythonEngine) { - config::PythonModelConfig python_model_config; - python_model_config.ReadFromYaml( - fmu::ToAbsoluteCortexDataPath( - fs::path(model_entry.path_to_model_yaml)) - .string()); - Json::Value obj = python_model_config.ToJson(); - obj["id"] = model_entry.model; - obj["model"] = model_entry.model; - data.append(std::move(obj)); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile( @@ -302,19 +292,6 @@ void Models::GetModel(const HttpRequestPtr& req, auto resp = cortex_utils::CreateCortexHttpTextAsJsonResponse(ret); resp->setStatusCode(drogon::k200OK); callback(resp); - } else if (model_config.engine == kPythonEngine) { - config::PythonModelConfig python_model_config; - python_model_config.ReadFromYaml( - fmu::ToAbsoluteCortexDataPath( - fs::path(model_entry.value().path_to_model_yaml)) - .string()); - ret = python_model_config.ToJson(); - ret["id"] = python_model_config.model; - ret["object"] = "model"; - ret["result"] = "OK"; - auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); - resp->setStatusCode(k200OK); - callback(resp); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile( @@ -383,17 +360,6 @@ void Models::UpdateModel(const HttpRequestPtr& req, yaml_handler.WriteYamlFile(yaml_fp.string()); message = "Successfully update model ID '" + model_id + "': " + json_body.toStyledString(); - } else if (model_config.engine == kPythonEngine) { - // Block changes to `command` - if (json_body.isMember("command")) { - json_body.removeMember("command"); - } - config::PythonModelConfig python_model_config; - python_model_config.ReadFromYaml(yaml_fp.string()); - python_model_config.FromJson(json_body); - python_model_config.ToYaml(yaml_fp.string()); - message = "Successfully update model ID '" + model_id + - "': " + json_body.toStyledString(); } else { config::RemoteModelConfig remote_model_config; remote_model_config.LoadFromYamlFile(yaml_fp.string()); diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index a8cff2166..079b69423 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -121,95 +121,6 @@ void server::GetModels(const HttpRequestPtr& req, LOG_TRACE << "Done get models"; } -void server::FineTuning( - const HttpRequestPtr& req, - std::function&& callback) { - auto ir = inference_svc_->FineTuning(req->getJsonObject()); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(ir)); - resp->setStatusCode( - static_cast(std::get<0>(ir)["status_code"].asInt())); - callback(resp); - LOG_TRACE << "Done fine-tuning"; -} - -void server::Inference(const HttpRequestPtr& req, - std::function&& callback) { - - auto json_body = req->getJsonObject(); - - LOG_TRACE << "Start inference"; - auto q = std::make_shared(); - auto ir = inference_svc_->HandleInference(q, req->getJsonObject()); - LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); - if (ir.has_error()) { - auto err = ir.error(); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err)); - resp->setStatusCode( - static_cast(std::get<0>(err)["status_code"].asInt())); - callback(resp); - return; - } - - bool is_stream = - (*json_body).get("stream", false).asBool() || - (*json_body).get("body", Json::Value()).get("stream", false).asBool(); - - LOG_TRACE << "Wait to inference"; - if (is_stream) { - auto model_id = (*json_body).get("model", "invalid_model").asString(); - auto engine_type = [this, &json_body]() -> std::string { - if (!inference_svc_->HasFieldInReq(json_body, "engine")) { - return kLlamaRepo; - } else { - return (*(json_body)).get("engine", kLlamaRepo).asString(); - } - }(); - ProcessStreamRes(callback, q, engine_type, model_id); - } else { - ProcessNonStreamRes(callback, *q); - LOG_TRACE << "Done inference"; - } -} - -void server::RouteRequest( - const HttpRequestPtr& req, - std::function&& callback) { - - auto json_body = req->getJsonObject(); - - LOG_TRACE << "Start route request"; - auto q = std::make_shared(); - auto ir = inference_svc_->HandleRouteRequest(q, req->getJsonObject()); - LOG_DEBUG << "request: " << req->getJsonObject()->toStyledString(); - if (ir.has_error()) { - auto err = ir.error(); - auto resp = cortex_utils::CreateCortexHttpJsonResponse(std::get<1>(err)); - resp->setStatusCode( - static_cast(std::get<0>(err)["status_code"].asInt())); - callback(resp); - return; - } - auto is_stream = - (*json_body).get("stream", false).asBool() || - (*json_body).get("body", Json::Value()).get("stream", false).asBool(); - LOG_TRACE << "Wait to route request"; - if (is_stream) { - - auto model_id = (*json_body).get("model", "invalid_model").asString(); - auto engine_type = [this, &json_body]() -> std::string { - if (!inference_svc_->HasFieldInReq(json_body, "engine")) { - return kLlamaRepo; - } else { - return (*(json_body)).get("engine", kLlamaRepo).asString(); - } - }(); - ProcessStreamRes(callback, q, engine_type, model_id); - } else { - ProcessNonStreamRes(callback, *q); - LOG_TRACE << "Done route request"; - } -} - void server::LoadModel(const HttpRequestPtr& req, std::function&& callback) { auto ir = inference_svc_->LoadModel(req->getJsonObject()); diff --git a/engine/controllers/server.h b/engine/controllers/server.h index 42214a641..7c8d759b4 100644 --- a/engine/controllers/server.h +++ b/engine/controllers/server.h @@ -39,15 +39,9 @@ class server : public drogon::HttpController, METHOD_ADD(server::ModelStatus, "modelstatus", Options, Post); METHOD_ADD(server::GetModels, "models", Get); - // cortex.python API - METHOD_ADD(server::FineTuning, "finetuning", Options, Post); - // Openai compatible path ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Options, Post); - ADD_METHOD_TO(server::FineTuning, "/v1/fine_tuning/job", Options, Post); ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Options, Post); - ADD_METHOD_TO(server::Inference, "/v1/inference", Options, Post); - ADD_METHOD_TO(server::RouteRequest, "/v1/route/request", Options, Post); METHOD_LIST_END @@ -69,13 +63,6 @@ class server : public drogon::HttpController, void GetModels( const HttpRequestPtr& req, std::function&& callback) override; - void FineTuning( - const HttpRequestPtr& req, - std::function&& callback) override; - void Inference(const HttpRequestPtr& req, - std::function&& callback); - void RouteRequest(const HttpRequestPtr& req, - std::function&& callback); private: void ProcessStreamRes(std::function cb, diff --git a/engine/cortex-common/EngineI.h b/engine/cortex-common/EngineI.h index 754f16593..b796ebaed 100644 --- a/engine/cortex-common/EngineI.h +++ b/engine/cortex-common/EngineI.h @@ -61,12 +61,4 @@ class EngineI { // Stop inflight chat completion in stream mode virtual void StopInferencing(const std::string& model_id) = 0; - - virtual Json::Value GetRemoteModels() = 0; - virtual void HandleRouteRequest( - std::shared_ptr json_body, - std::function&& callback) = 0; - virtual void HandleInference( - std::shared_ptr json_body, - std::function&& callback) = 0; }; diff --git a/engine/cortex-common/cortexpythoni.h b/engine/cortex-common/cortexpythoni.h deleted file mode 100644 index 06a79838f..000000000 --- a/engine/cortex-common/cortexpythoni.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -#include "json/value.h" - -class CortexPythonEngineI { - public: - virtual ~CortexPythonEngineI() {} - - virtual bool IsSupported(const std::string& f) = 0; - - virtual void ExecutePythonFile(std::string binary_execute_path, - std::string file_execution_path, - std::string python_library_path) = 0; - - virtual void HandlePythonFileExecutionRequest( - std::shared_ptr json_body, - std::function&& callback) = 0; -}; - diff --git a/engine/extensions/python-engine/python_engine.cc b/engine/extensions/python-engine/python_engine.cc deleted file mode 100644 index 685301b47..000000000 --- a/engine/extensions/python-engine/python_engine.cc +++ /dev/null @@ -1,899 +0,0 @@ -#include "python_engine.h" -#include -#include -#include -#include - -namespace python_engine { -namespace { -constexpr const int k200OK = 200; -constexpr const int k400BadRequest = 400; -constexpr const int k409Conflict = 409; -constexpr const int k500InternalServerError = 500; -constexpr const int kFileLoggerOption = 0; - -size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, - void* userdata) { - auto* context = static_cast(userdata); - std::string chunk(ptr, size * nmemb); - - context->buffer += chunk; - - // Process complete lines - size_t pos; - while ((pos = context->buffer.find('\n')) != std::string::npos) { - std::string line = context->buffer.substr(0, pos); - context->buffer = context->buffer.substr(pos + 1); - LOG_DEBUG << "line: " << line; - - // Skip empty lines - if (line.empty() || line == "\r") - continue; - - if (line == "data: [DONE]") { - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = true; - status["status_code"] = 200; - (*context->callback)(std::move(status), Json::Value()); - break; - } - - // Parse the JSON - Json::Value chunk_json; - chunk_json["data"] = line + "\n\n"; - Json::Reader reader; - - Json::Value status; - status["is_done"] = false; - status["has_error"] = false; - status["is_stream"] = true; - status["status_code"] = 200; - (*context->callback)(std::move(status), std::move(chunk_json)); - } - - return size * nmemb; -} - -static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, - std::string* data) { - data->append(ptr, size * nmemb); - return size * nmemb; -} - -} // namespace - -PythonEngine::PythonEngine() : q_(4 /*n_parallel*/, "python_engine") {} - -PythonEngine::~PythonEngine() { - curl_global_cleanup(); -} - -config::PythonModelConfig* PythonEngine::GetModelConfig( - const std::string& model) { - std::shared_lock lock(models_mutex_); - auto it = models_.find(model); - if (it != models_.end()) { - return &it->second; - } - return nullptr; -} - -bool PythonEngine::TerminateModelProcess(const std::string& model) { - auto it = process_map_.find(model); - if (it == process_map_.end()) { - LOG_ERROR << "No process found for model: " << model - << ", removing from list running models."; - models_.erase(model); - return false; - } - - bool success = cortex::process::KillProcess(it->second); - if (success) { - process_map_.erase(it); - } - return success; -} - -CurlResponse PythonEngine::MakeGetRequest(const std::string& model, - const std::string& path) { - auto const& config = models_[model]; - std::string full_url = "http://localhost:" + config.port + path; - CurlResponse response; - - auto result = curl_utils::SimpleRequest(full_url, RequestType::GET); - if (result.has_error()) { - response.error = true; - response.error_message = result.error(); - } else { - response.body = result.value(); - } - return response; -} - -CurlResponse PythonEngine::MakeDeleteRequest(const std::string& model, - const std::string& path) { - auto const& config = models_[model]; - std::string full_url = "http://localhost:" + config.port + path; - CurlResponse response; - - auto result = curl_utils::SimpleRequest(full_url, RequestType::DEL); - - if (result.has_error()) { - response.error = true; - response.error_message = result.error(); - } else { - response.body = result.value(); - } - - return response; -} - -CurlResponse PythonEngine::MakePostRequest(const std::string& model, - const std::string& path, - const std::string& body) { - auto const& config = models_[model]; - std::string full_url = "http://localhost:" + config.port + path; - - CurlResponse response; - auto result = curl_utils::SimpleRequest(full_url, RequestType::POST, body); - - if (result.has_error()) { - response.error = true; - response.error_message = result.error(); - } else { - response.body = result.value(); - } - return response; -} - -bool PythonEngine::LoadModelConfig(const std::string& model, - const std::string& yaml_path) { - try { - config::PythonModelConfig config; - config.ReadFromYaml(yaml_path); - std::unique_lock lock(models_mutex_); - models_[model] = config; - } catch (const std::exception& e) { - LOG_ERROR << "Failed to load model config: " << e.what(); - return false; - } - - return true; -} - -void PythonEngine::GetModels( - std::shared_ptr json_body, - std::function&& callback) { - - Json::Value response_json; - Json::Value model_array(Json::arrayValue); - - for (const auto& pair : models_) { - auto val = pair.second.ToJson(); - model_array.append(val); - } - - response_json["object"] = "list"; - response_json["data"] = model_array; - - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - - callback(std::move(status), std::move(response_json)); -} - -void PythonEngine::LoadModel( - std::shared_ptr json_body, - std::function&& callback) { - // TODO: handle a case that can spawn process but the process spawn fail. - pid_t pid; - if (!json_body->isMember("model") || !json_body->isMember("model_path")) { - Json::Value error; - error["error"] = "Missing required fields: model or model_path"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - const std::string& model = (*json_body)["model"].asString(); - const std::string& model_path = (*json_body)["model_path"].asString(); - if (models_.find(model) != models_.end()) { - Json::Value error; - error["error"] = "Model already loaded!"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k409Conflict; - callback(std::move(status), std::move(error)); - return; - } - - if (!LoadModelConfig(model, model_path)) { - Json::Value error; - error["error"] = "Failed to load model configuration"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - callback(std::move(status), std::move(error)); - return; - } - auto model_config = models_[model]; - auto model_folder_path = model_config.files[0]; - auto data_folder_path = - std::filesystem::path(model_folder_path) / std::filesystem::path("venv"); - try { -#if defined(_WIN32) - auto executable = std::filesystem::path(data_folder_path) / - std::filesystem::path("Scripts"); -#else - auto executable = - std::filesystem::path(data_folder_path) / std::filesystem::path("bin"); -#endif - - auto executable_str = - (executable / std::filesystem::path(model_config.command[0])).string(); - auto command = model_config.command; - command[0] = executable_str; - command.push_back((std::filesystem::path(model_folder_path) / - std::filesystem::path(model_config.script)) - .string()); - std::list args{"--port", - model_config.port, - "--log_path", - (file_manager_utils::GetCortexLogPath() / - std::filesystem::path(model_config.log_path)) - .string(), - "--log_level", - model_config.log_level}; - if (!model_config.extra_params.isNull() && - model_config.extra_params.isObject()) { - for (const auto& key : model_config.extra_params.getMemberNames()) { - const Json::Value& value = model_config.extra_params[key]; - - // Convert key to string with -- prefix - std::string param_key = "--" + key; - - // Handle different JSON value types - if (value.isString()) { - args.emplace_back(param_key); - args.emplace_back(value.asString()); - } else if (value.isInt()) { - args.emplace_back(param_key); - args.emplace_back(std::to_string(value.asInt())); - } else if (value.isDouble()) { - args.emplace_back(param_key); - args.emplace_back(std::to_string(value.asDouble())); - } else if (value.isBool()) { - // For boolean, only add the flag if true - if (value.asBool()) { - args.emplace_back(param_key); - } - } - } - } - - // Add the parsed arguments to the command - command.insert(command.end(), args.begin(), args.end()); - pid = cortex::process::SpawnProcess(command); - process_map_[model] = pid; - if (pid == -1) { - std::unique_lock lock(models_mutex_); - if (models_.find(model) != models_.end()) { - models_.erase(model); - } - - Json::Value error; - error["error"] = "Fail to spawn process with pid -1"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - callback(std::move(status), std::move(error)); - return; - } - } catch (const std::exception& e) { - std::unique_lock lock(models_mutex_); - if (models_.find(model) != models_.end()) { - models_.erase(model); - } - - Json::Value error; - error["error"] = e.what(); - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - callback(std::move(status), std::move(error)); - return; - } - - Json::Value response; - response["status"] = - "Model loaded successfully with pid: " + std::to_string(pid); - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - callback(std::move(status), std::move(response)); -} - -void PythonEngine::UnloadModel( - std::shared_ptr json_body, - std::function&& callback) { - if (!json_body->isMember("model")) { - Json::Value error; - error["error"] = "Missing required field: model"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - auto model = (*json_body)["model"].asString(); - - { - if (TerminateModelProcess(model)) { - std::unique_lock lock(models_mutex_); - models_.erase(model); - } else { - Json::Value error; - error["error"] = "Fail to terminate process with id: " + - std::to_string(process_map_[model]); - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - } - - Json::Value response; - response["status"] = "Model unloaded successfully"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - callback(std::move(status), std::move(response)); -} - -void PythonEngine::HandleChatCompletion( - std::shared_ptr json_body, - std::function&& callback) { - LOG_WARN << "Does not support yet!"; -} - -CurlResponse PythonEngine::MakeStreamPostRequest( - const std::string& model, const std::string& path, const std::string& body, - const std::function& callback) { - auto const& config = models_[model]; - CURL* curl = curl_easy_init(); - CurlResponse response; - - if (!curl) { - response.error = true; - response.error_message = "Failed to initialize CURL"; - return response; - } - - std::string full_url = "http://localhost:" + config.port + path; - - struct curl_slist* headers = nullptr; - headers = curl_slist_append(headers, "Content-Type: application/json"); - headers = curl_slist_append(headers, "Accept: text/event-stream"); - headers = curl_slist_append(headers, "Cache-Control: no-cache"); - headers = curl_slist_append(headers, "Connection: keep-alive"); - - StreamContext context{ - std::make_shared>( - callback), - ""}; - - curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); - curl_easy_setopt(curl, CURLOPT_POST, 1L); - curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str()); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, StreamWriteCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &context); - curl_easy_setopt(curl, CURLOPT_TRANSFER_ENCODING, 1L); - - CURLcode res = curl_easy_perform(curl); - - if (res != CURLE_OK) { - response.error = true; - response.error_message = curl_easy_strerror(res); - - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = true; - status["status_code"] = 500; - - Json::Value error; - error["error"] = response.error_message; - callback(std::move(status), std::move(error)); - } - - curl_slist_free_all(headers); - curl_easy_cleanup(curl); - return response; -} - -void PythonEngine::HandleInference( - std::shared_ptr json_body, - std::function&& callback) { - if (json_body && !json_body->isMember("model")) { - Json::Value error; - error["error"] = "Missing required field: model is required!"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - std::string method = "post"; - std::string path = "/inference"; - auto transform_request = (*json_body).get("transform_request", "").asString(); - auto transform_response = - (*json_body).get("transform_response", "").asString(); - auto model = (*json_body)["model"].asString(); - auto& body = (*json_body)["body"]; - - if (models_.find(model) == models_.end()) { - Json::Value error; - error["error"] = "Model '" + model + "' is not loaded!"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - // Transform Request - std::string transformed_request; - if (!transform_request.empty()) { - - try { - // Validate JSON body - if (!body || body.isNull()) { - throw std::runtime_error("Invalid or null JSON body"); - } - - // Render with error handling - try { - transformed_request = renderer_.Render(transform_request, body); - - } catch (const std::exception& e) { - throw std::runtime_error("Template rendering error: " + - std::string(e.what())); - } - } catch (const std::exception& e) { - // Log error and potentially rethrow or handle accordingly - LOG_WARN << "Error in TransformRequest: " << e.what(); - LOG_WARN << "Using original request body"; - transformed_request = body.toStyledString(); - } - } else { - transformed_request = body.toStyledString(); - } - - // End Transform request - - CurlResponse response; - if (method == "post") { - if (body.isMember("stream") && body["stream"].asBool()) { - q_.runTaskInQueue( - [this, model, path, transformed_request, cb = std::move(callback)] { - MakeStreamPostRequest(model, path, transformed_request, cb); - }); - - return; - } else { - response = MakePostRequest(model, path, transformed_request); - } - - } else if (method == "get") { - response = MakeGetRequest(model, path); - } else if (method == "delete") { - response = MakeDeleteRequest(model, path); - } else { - Json::Value error; - error["error"] = - "method not supported! Supported methods are: post, get, delete"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - if (response.error) { - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - Json::Value error; - error["error"] = response.error_message; - callback(std::move(status), std::move(error)); - return; - } - - Json::Value response_json; - Json::Reader reader; - if (!reader.parse(response.body, response_json)) { - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - Json::Value error; - error["error"] = "Failed to parse response"; - callback(std::move(status), std::move(error)); - return; - } - - if (!transform_response.empty()) { - // Transform Response - std::string response_str; - try { - // Validate JSON body - if (!response_json || response_json.isNull()) { - throw std::runtime_error("Invalid or null JSON body"); - } - // Render with error handling - try { - response_str = renderer_.Render(transform_response, response_json); - } catch (const std::exception& e) { - throw std::runtime_error("Template rendering error: " + - std::string(e.what())); - } - } catch (const std::exception& e) { - // Log error and potentially rethrow or handle accordingly - LOG_WARN << "Error in TransformRequest: " << e.what(); - LOG_WARN << "Using original request body"; - response_str = response_json.toStyledString(); - } - - Json::Reader reader_final; - Json::Value response_json_final; - if (!reader_final.parse(response_str, response_json_final)) { - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - Json::Value error; - error["error"] = "Failed to parse response"; - callback(std::move(status), std::move(error)); - return; - } - - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - - callback(std::move(status), std::move(response_json_final)); - } else { - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - - callback(std::move(status), std::move(response_json)); - } -} - -Json::Value PythonEngine::GetRemoteModels() { - return Json::Value(); -} - -void PythonEngine::StopInferencing(const std::string& model_id) {} - -void PythonEngine::HandleRouteRequest( - std::shared_ptr json_body, - std::function&& callback) { - if (!json_body->isMember("model") || !json_body->isMember("method") || - !json_body->isMember("path")) { - Json::Value error; - error["error"] = - "Missing required field: model, method and path are required!"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - auto method = (*json_body)["method"].asString(); - auto path = (*json_body)["path"].asString(); - auto transform_request = (*json_body).get("transform_request", "").asString(); - auto transform_response = - (*json_body).get("transform_response", "").asString(); - auto model = (*json_body)["model"].asString(); - auto& body = (*json_body)["body"]; - - if (models_.find(model) == models_.end()) { - Json::Value error; - error["error"] = "Model '" + model + "' is not loaded!"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - // Transform Request - std::string transformed_request; - if (!transform_request.empty()) { - - try { - // Validate JSON body - if (!body || body.isNull()) { - throw std::runtime_error("Invalid or null JSON body"); - } - - // Render with error handling - try { - transformed_request = renderer_.Render(transform_request, *json_body); - } catch (const std::exception& e) { - throw std::runtime_error("Template rendering error: " + - std::string(e.what())); - } - } catch (const std::exception& e) { - // Log error and potentially rethrow or handle accordingly - LOG_WARN << "Error in TransformRequest: " << e.what(); - LOG_WARN << "Using original request body"; - transformed_request = body.toStyledString(); - } - } else { - transformed_request = body.toStyledString(); - } - - // End Transform request - - CurlResponse response; - if (method == "post") { - response = MakePostRequest(model, path, transformed_request); - } else if (method == "get") { - response = MakeGetRequest(model, path); - } else if (method == "delete") { - response = MakeDeleteRequest(model, path); - } else { - Json::Value error; - error["error"] = - "method not supported! Supported methods are: post, get, delete"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - if (response.error) { - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - Json::Value error; - error["error"] = response.error_message; - callback(std::move(status), std::move(error)); - return; - } - - Json::Value response_json; - Json::Reader reader; - if (!reader.parse(response.body, response_json)) { - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - Json::Value error; - error["error"] = "Failed to parse response"; - callback(std::move(status), std::move(error)); - return; - } - - if (!transform_response.empty()) { - // Transform Response - std::string response_str; - try { - // Validate JSON body - if (!response_json || response_json.isNull()) { - throw std::runtime_error("Invalid or null JSON body"); - } - // Render with error handling - try { - response_str = renderer_.Render(transform_response, response_json); - } catch (const std::exception& e) { - throw std::runtime_error("Template rendering error: " + - std::string(e.what())); - } - } catch (const std::exception& e) { - // Log error and potentially rethrow or handle accordingly - LOG_WARN << "Error in TransformRequest: " << e.what(); - LOG_WARN << "Using original request body"; - response_str = response_json.toStyledString(); - } - - Json::Reader reader_final; - Json::Value response_json_final; - if (!reader_final.parse(response_str, response_json_final)) { - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k500InternalServerError; - Json::Value error; - error["error"] = "Failed to parse response"; - callback(std::move(status), std::move(error)); - return; - } - - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - - callback(std::move(status), std::move(response_json_final)); - } else { - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - - callback(std::move(status), std::move(response_json)); - } -} - -void PythonEngine::GetModelStatus( - std::shared_ptr json_body, - std::function&& callback) { - if (!json_body->isMember("model")) { - Json::Value error; - error["error"] = "Missing required field: model"; - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - callback(std::move(status), std::move(error)); - return; - } - - auto model = json_body->get("model", "").asString(); - auto model_config = models_[model]; - auto health_endpoint = model_config.heath_check; - auto pid = process_map_[model]; - auto is_process_live = cortex::process::IsProcessAlive(pid); - auto response_health = MakeGetRequest(model, health_endpoint.path); - - if (response_health.error && is_process_live) { - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - Json::Value message; - message["message"] = "model '"+model+"' is loading"; - callback(std::move(status), std::move(message)); - return; - } - else if(response_health.error && !is_process_live){ - Json::Value status; - status["is_done"] = true; - status["has_error"] = true; - status["is_stream"] = false; - status["status_code"] = k400BadRequest; - Json::Value message; - message["message"] = response_health.error_message; - callback(std::move(status), std::move(message)); - return; - } - - Json::Value response; - response["model"] = model; - response["model_loaded"] = true; - response["model_data"] = model_config.ToJson(); - - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = k200OK; - callback(std::move(status), std::move(response)); -} - -// Implement remaining virtual functions -void PythonEngine::HandleEmbedding( - std::shared_ptr, - std::function&& callback) { - callback(Json::Value(), Json::Value()); -} - -bool PythonEngine::IsSupported(const std::string& f) { - if (f == "HandleChatCompletion" || f == "LoadModel" || f == "UnloadModel" || - f == "GetModelStatus" || f == "GetModels" || f == "SetFileLogger" || - f == "SetLogLevel") { - return true; - } - return false; -} - -bool PythonEngine::SetFileLogger(int max_log_lines, - const std::string& log_path) { - if (!async_file_logger_) { - async_file_logger_ = std::make_unique(); - } - - async_file_logger_->setFileName(log_path); - async_file_logger_->setMaxLines(max_log_lines); // Keep last 100000 lines - async_file_logger_->startLogging(); - trantor::Logger::setOutputFunction( - [&](const char* msg, const uint64_t len) { - if (async_file_logger_) - async_file_logger_->output_(msg, len); - }, - [&]() { - if (async_file_logger_) - async_file_logger_->flush(); - }); - freopen(log_path.c_str(), "w", stderr); - freopen(log_path.c_str(), "w", stdout); - return true; -} - -void PythonEngine::SetLogLevel(trantor::Logger::LogLevel log_level) { - trantor::Logger::setLogLevel(log_level); -} - -void PythonEngine::Load(EngineLoadOption opts) { - // Develop register model here on loading engine -}; - -void PythonEngine::Unload(EngineUnloadOption opts) { - for (const auto& pair : models_) { - TerminateModelProcess(pair.first); - } -}; - -} // namespace python_engine \ No newline at end of file diff --git a/engine/extensions/python-engine/python_engine.h b/engine/extensions/python-engine/python_engine.h deleted file mode 100644 index 842ce8259..000000000 --- a/engine/extensions/python-engine/python_engine.h +++ /dev/null @@ -1,110 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include "config/model_config.h" -#include "trantor/utils/ConcurrentTaskQueue.h" - -#include "cortex-common/EngineI.h" -#include "extensions/template_renderer.h" -#include "utils/file_logger.h" -#include "utils/file_manager_utils.h" -#include "utils/curl_utils.h" -#include "utils/process/utils.h" - -// Helper for CURL response -namespace python_engine { -struct StreamContext { - std::shared_ptr> callback; - std::string buffer; -}; - -struct CurlResponse { - std::string body; - bool error{false}; - std::string error_message; -}; - -class PythonEngine : public EngineI { - private: - // Model configuration - - // Thread-safe model config storage - mutable std::shared_mutex models_mutex_; - std::unordered_map models_; - extensions::TemplateRenderer renderer_; - std::unique_ptr async_file_logger_; - std::unordered_map process_map_; - trantor::ConcurrentTaskQueue q_; - - // Helper functions - CurlResponse MakePostRequest(const std::string& model, - const std::string& path, - const std::string& body); - CurlResponse MakeGetRequest(const std::string& model, - const std::string& path); - CurlResponse MakeDeleteRequest(const std::string& model, - const std::string& path); - CurlResponse MakeStreamPostRequest( - const std::string& model, const std::string& path, - const std::string& body, - const std::function& callback); - - // Process manager functions - bool TerminateModelProcess(const std::string& model); - - // Internal model management - bool LoadModelConfig(const std::string& model, const std::string& yaml_path); - config::PythonModelConfig* GetModelConfig(const std::string& model); - - public: - PythonEngine(); - ~PythonEngine(); - - void Load(EngineLoadOption opts) override; - - void Unload(EngineUnloadOption opts) override; - - // Main interface implementations - void GetModels( - std::shared_ptr json_body, - std::function&& callback) override; - - void HandleChatCompletion( - std::shared_ptr json_body, - std::function&& callback) override; - - void LoadModel( - std::shared_ptr json_body, - std::function&& callback) override; - - void UnloadModel( - std::shared_ptr json_body, - std::function&& callback) override; - - void GetModelStatus( - std::shared_ptr json_body, - std::function&& callback) override; - - // Other required virtual functions - void HandleEmbedding( - std::shared_ptr json_body, - std::function&& callback) override; - bool IsSupported(const std::string& feature) override; - bool SetFileLogger(int max_log_lines, const std::string& log_path) override; - void SetLogLevel(trantor::Logger::LogLevel logLevel) override; - void HandleRouteRequest( - std::shared_ptr json_body, - std::function&& callback) override; - void HandleInference( - std::shared_ptr json_body, - std::function&& callback) override; - Json::Value GetRemoteModels() override; - void StopInferencing(const std::string& model_id) override; -}; -} // namespace python_engine \ No newline at end of file diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 8fd070bea..401edae12 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -9,7 +9,6 @@ #include "config/model_config.h" #include "database/engines.h" #include "database/models.h" -#include "extensions/python-engine/python_engine.h" #include "extensions/remote-engine/remote_engine.h" #include "utils/archive_utils.h" @@ -674,14 +673,6 @@ cpp::result EngineService::LoadEngine( return {}; } - // Check for python engine - - if (engine_name == kPythonEngine) { - engines_[engine_name].engine = new python_engine::PythonEngine(); - CTL_INF("Loaded engine: " << engine_name); - return {}; - } - // Check for remote engine if (IsRemoteEngine(engine_name)) { auto exist_engine = GetEngineByNameAndVariant(engine_name); @@ -911,12 +902,6 @@ cpp::result EngineService::IsEngineReady( return true; } - // End hard code - // Check for python engine - if (engine == kPythonEngine) { - return true; - } - auto os = hw_inf_.sys_inf->os; auto installed_variants = GetInstalledEngineVariants(engine); diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index f98037bab..830944aee 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -9,7 +9,6 @@ #include "common/engine_servicei.h" #include "cortex-common/EngineI.h" -#include "cortex-common/cortexpythoni.h" #include "cortex-common/remote_enginei.h" #include "database/engines.h" #include "services/database_service.h" @@ -37,7 +36,7 @@ struct EngineUpdateResult { } }; -using EngineV = std::variant; +using EngineV = std::variant; class EngineService : public EngineServiceI { private: diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index 0a52665ad..e4b3853e3 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -139,64 +139,6 @@ cpp::result InferenceService::HandleEmbedding( return {}; } -cpp::result InferenceService::HandleInference( - std::shared_ptr q, std::shared_ptr json_body) { - std::string engine_type; - if (!HasFieldInReq(json_body, "engine")) { - engine_type = kLlamaRepo; - } else { - engine_type = (*(json_body)).get("engine", kLlamaRepo).asString(); - } - - auto engine_result = engine_service_->GetLoadedEngine(engine_type); - if (engine_result.has_error()) { - Json::Value res; - Json::Value stt; - res["message"] = "Engine is not loaded yet"; - stt["status_code"] = drogon::k400BadRequest; - LOG_WARN << "Engine is not loaded yet"; - return cpp::fail(std::make_pair(stt, res)); - } - - auto cb = [q](Json::Value status, Json::Value res) { - q->push(std::make_pair(status, res)); - }; - if (std::holds_alternative(engine_result.value())) { - std::get(engine_result.value()) - ->HandleInference(json_body, std::move(cb)); - } - return {}; -} - -cpp::result InferenceService::HandleRouteRequest( - std::shared_ptr q, std::shared_ptr json_body) { - std::string engine_type; - if (!HasFieldInReq(json_body, "engine")) { - engine_type = kLlamaRepo; - } else { - engine_type = (*(json_body)).get("engine", kLlamaRepo).asString(); - } - - auto engine_result = engine_service_->GetLoadedEngine(engine_type); - if (engine_result.has_error()) { - Json::Value res; - Json::Value stt; - res["message"] = "Engine is not loaded yet"; - stt["status_code"] = drogon::k400BadRequest; - LOG_WARN << "Engine is not loaded yet"; - return cpp::fail(std::make_pair(stt, res)); - } - - auto cb = [q](Json::Value status, Json::Value res) { - q->push(std::make_pair(status, res)); - }; - if (std::holds_alternative(engine_result.value())) { - std::get(engine_result.value()) - ->HandleRouteRequest(json_body, std::move(cb)); - } - return {}; -} - InferResult InferenceService::LoadModel( std::shared_ptr json_body) { std::string engine_type; @@ -348,56 +290,6 @@ InferResult InferenceService::GetModels( return std::make_pair(stt, root); } -InferResult InferenceService::FineTuning( - std::shared_ptr json_body) { - std::string ne = kPythonRuntimeRepo; - Json::Value r; - Json::Value stt; - - // TODO: namh refactor this - // if (engines_.find(ne) == engines_.end()) { - // try { - // std::string abs_path = - // (getenv("ENGINE_PATH") - // ? getenv("ENGINE_PATH") - // : file_manager_utils::GetCortexDataPath().string()) + - // kPythonRuntimeLibPath; - // engines_[ne].dl = std::make_unique(abs_path, "engine"); - // } catch (const cortex_cpp::dylib::load_error& e) { - // - // LOG_ERROR << "Could not load engine: " << e.what(); - // engines_.erase(ne); - // - // Json::Value res; - // r["message"] = "Could not load engine " + ne; - // stt["status_code"] = drogon::k500InternalServerError; - // return std::make_pair(stt, r); - // } - // - // auto func = - // engines_[ne].dl->get_function("get_engine"); - // engines_[ne].engine = func(); - // LOG_INFO << "Loaded engine: " << ne; - // } - // - // LOG_TRACE << "Start to fine-tuning"; - // auto& en = std::get(engines_[ne].engine); - // if (en->IsSupported("HandlePythonFileExecutionRequest")) { - // en->HandlePythonFileExecutionRequest( - // json_body, [&r, &stt](Json::Value status, Json::Value res) { - // r = res; - // stt = status; - // }); - // } else { - // LOG_WARN << "Method is not supported yet"; - r["message"] = "Method is not supported yet"; - stt["status_code"] = drogon::k500InternalServerError; - // return std::make_pair(stt, r); - // } - // LOG_TRACE << "Done fine-tuning"; - return std::make_pair(stt, r); -} - bool InferenceService::StopInferencing(const std::string& engine_name, const std::string& model_id) { CTL_DBG("Stop inferencing"); diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h index 726275bba..119013f5f 100644 --- a/engine/services/inference_service.h +++ b/engine/services/inference_service.h @@ -42,12 +42,6 @@ class InferenceService { cpp::result HandleEmbedding( std::shared_ptr q, std::shared_ptr json_body); - cpp::result HandleInference( - std::shared_ptr q, std::shared_ptr json_body); - - cpp::result HandleRouteRequest( - std::shared_ptr q, std::shared_ptr json_body); - InferResult LoadModel(std::shared_ptr json_body); InferResult UnloadModel(const std::string& engine, @@ -57,8 +51,6 @@ class InferenceService { InferResult GetModels(std::shared_ptr json_body); - InferResult FineTuning(std::shared_ptr json_body); - bool StopInferencing(const std::string& engine_name, const std::string& model_id); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index cc735d006..53abf70a9 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -527,59 +527,8 @@ ModelService::DownloadModelFromCortexsoAsync( config::YamlHandler yaml_handler; yaml_handler.ModelConfigFromFile(model_yml_item->localPath.string()); auto mc = yaml_handler.GetModelConfig(); - if (mc.engine == kPythonEngine) { // process for Python engine - config::PythonModelConfig python_model_config; - python_model_config.ReadFromYaml(model_yml_item->localPath.string()); - python_model_config.files.push_back( - model_yml_item->localPath.parent_path().string()); - python_model_config.ToYaml(model_yml_item->localPath.string()); - // unzip venv.zip - auto model_folder = model_yml_item->localPath.parent_path(); - auto venv_path = model_folder / std::filesystem::path("venv"); - if (!std::filesystem::exists(venv_path)) { - std::filesystem::create_directories(venv_path); - } - auto venv_zip = model_folder / std::filesystem::path("venv.zip"); - if (std::filesystem::exists(venv_zip)) { - if (archive_utils::ExtractArchive(venv_zip.string(), - venv_path.string())) { - std::filesystem::remove_all(venv_zip); - CTL_INF("Successfully extract venv.zip"); - // If extract success create pyvenv.cfg - std::ofstream pyvenv_cfg(venv_path / - std::filesystem::path("pyvenv.cfg")); -#ifdef _WIN32 - pyvenv_cfg << "home = " - << (venv_path / std::filesystem::path("Scripts")).string() - << std::endl; - pyvenv_cfg << "executable = " - << (venv_path / std::filesystem::path("Scripts") / - std::filesystem::path("python.exe")) - .string() - << std::endl; -#else - pyvenv_cfg << "home = " - << (venv_path / std::filesystem::path("bin/")).string() - << std::endl; - pyvenv_cfg - << "executable = " - << (venv_path / std::filesystem::path("bin/python")).string() - << std::endl; -#endif - // Close the file - pyvenv_cfg.close(); - // Add executable permission to python - (void)set_permission_utils::SetExecutePermissionsRecursive(venv_path); - } else { - CTL_ERR("Failed to extract venv.zip"); - }; - - } else { - CTL_ERR( - "venv.zip not found in model folder: " << model_folder.string()); - } - } else { + if (mc.engine == kLlamaEngine) { mc.model = unique_model_id; uint64_t model_size = 0; @@ -836,62 +785,6 @@ cpp::result ModelService::StartModel( .string()); auto mc = yaml_handler.GetModelConfig(); - // Check if Python model first - if (mc.engine == kPythonEngine) { - - config::PythonModelConfig python_model_config; - python_model_config.ReadFromYaml( - - fmu::ToAbsoluteCortexDataPath( - fs::path(model_entry.value().path_to_model_yaml)) - .string()); - // Start all depends model - auto depends = python_model_config.depends; - for (auto& depend : depends) { - Json::Value temp; - auto res = StartModel(depend, temp, false); - if (res.has_error()) { - CTL_WRN("Error: " + res.error()); - for (auto& depend : depends) { - if (depend != model_handle) { - auto sr = StopModel(depend); - } - } - return cpp::fail("Model failed to start dependency '" + depend + - "' : " + res.error()); - } - } - - json_data["model"] = model_handle; - json_data["model_path"] = - fmu::ToAbsoluteCortexDataPath( - fs::path(model_entry.value().path_to_model_yaml)) - .string(); - json_data["engine"] = mc.engine; - assert(!!inference_svc_); - // Check if python engine - - auto ir = - inference_svc_->LoadModel(std::make_shared(json_data)); - auto status = std::get<0>(ir)["status_code"].asInt(); - auto data = std::get<1>(ir); - - if (status == drogon::k200OK) { - return StartModelResult{.success = true, .warning = ""}; - } else if (status == drogon::k409Conflict) { - CTL_INF("Model '" + model_handle + "' is already loaded"); - return StartModelResult{.success = true, .warning = ""}; - } else { - // only report to user the error - for (auto& depend : depends) { - (void)StopModel(depend); - } - } - CTL_ERR("Model failed to start with status code: " << status); - return cpp::fail("Model failed to start: " + - data["message"].asString()); - } - // Running remote model if (engine_svc_->IsRemoteEngine(mc.engine)) { (void)engine_svc_->LoadEngine(mc.engine); @@ -1002,7 +895,6 @@ cpp::result ModelService::StartModel( } assert(!!inference_svc_); - // Check if python engine auto ir = inference_svc_->LoadModel(std::make_shared(json_data)); @@ -1068,21 +960,6 @@ cpp::result ModelService::StopModel( engine_name = kLlamaEngine; } - // Update for python engine - if (engine_name == kPythonEngine) { - auto model_entry = db_service_->GetModelInfo(model_handle); - config::PythonModelConfig python_model_config; - python_model_config.ReadFromYaml( - fmu::ToAbsoluteCortexDataPath( - fs::path(model_entry.value().path_to_model_yaml)) - .string()); - // Stop all depends model - auto depends = python_model_config.depends; - for (auto& depend : depends) { - (void)StopModel(depend); - } - } - // assert(inference_svc_); auto ir = inference_svc_->UnloadModel(engine_name, model_handle); diff --git a/engine/utils/config_yaml_utils.h b/engine/utils/config_yaml_utils.h index c94b8fe5f..193777c70 100644 --- a/engine/utils/config_yaml_utils.h +++ b/engine/utils/config_yaml_utils.h @@ -24,8 +24,7 @@ constexpr const auto kDefaultCorsEnabled = true; const std::vector kDefaultEnabledOrigins{ "http://localhost:39281", "http://127.0.0.1:39281", "http://0.0.0.0:39281"}; constexpr const auto kDefaultNoProxy = "example.com,::1,localhost,127.0.0.1"; -const std::vector kDefaultSupportedEngines{kLlamaEngine, - kPythonEngine}; +const std::vector kDefaultSupportedEngines{kLlamaEngine}; struct CortexConfig { std::string logFolderPath; diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h index 7bacf2249..4f560131f 100644 --- a/engine/utils/engine_constants.h +++ b/engine/utils/engine_constants.h @@ -1,17 +1,14 @@ #pragma once constexpr const auto kLlamaEngine = "llama-cpp"; -constexpr const auto kPythonEngine = "python-engine"; constexpr const auto kRemote = "remote"; constexpr const auto kLocal = "local"; constexpr const auto kLlamaRepo = "cortex.llamacpp"; -constexpr const auto kPythonRuntimeRepo = "cortex.python"; constexpr const auto kLlamaLibPath = "./engines/cortex.llamacpp"; -constexpr const auto kPythonRuntimeLibPath = "/engines/cortex.python"; // other constants constexpr auto static kHuggingFaceHost = "huggingface.co"; From 9b0e34bdcd692d6a29c8b7a4309fbbd1c0cbd67a Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 20 Mar 2025 17:04:13 +0700 Subject: [PATCH 2/4] feat: allow to configure api_keys by cli (#2154) * feat: allow to configure api_keys by cli * fix: typo * chore: add comment --------- Co-authored-by: sangjanai --- engine/cli/command_line_parser.cc | 2 +- engine/cli/commands/config_upd_cmd.cc | 35 ++++++++++++++++++------ engine/common/api_server_configuration.h | 9 ++++++ engine/main.cc | 6 ++++ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc index b423a6896..834d9775b 100644 --- a/engine/cli/command_line_parser.cc +++ b/engine/cli/command_line_parser.cc @@ -437,7 +437,7 @@ void CommandLineParser::SetupConfigsCommands() { auto is_empty = true; for (const auto& [key, value] : config_update_opts_) { - if (!value.empty()) { + if (!value.empty() || key == "api_keys") { is_empty = false; break; } diff --git a/engine/cli/commands/config_upd_cmd.cc b/engine/cli/commands/config_upd_cmd.cc index 58bedb2e5..3abdbac83 100644 --- a/engine/cli/commands/config_upd_cmd.cc +++ b/engine/cli/commands/config_upd_cmd.cc @@ -2,6 +2,7 @@ #include "commands/server_start_cmd.h" #include "common/api_server_configuration.h" #include "utils/curl_utils.h" +#include "utils/file_manager_utils.h" #include "utils/logging_utils.h" #include "utils/string_utils.h" #include "utils/url_parser.h" @@ -46,22 +47,40 @@ inline Json::Value NormalizeJson( void commands::ConfigUpdCmd::Exec( const std::string& host, int port, const std::unordered_map& options) { - if (!commands::IsServerAlive(host, port)) { - CLI_LOG("Starting server ..."); - commands::ServerStartCmd ssc; - if (!ssc.Exec(host, port)) { - return; - } - } auto non_null_opts = std::unordered_map(); for (const auto& [key, value] : options) { - if (value.empty()) { + // In case of api_keys, we allow empty value + if (value.empty() && key != "api_keys") { continue; } non_null_opts[key] = value; } + if (non_null_opts.size() == 1) { + for (const auto& [key, value] : non_null_opts) { + if (key == "api_keys") { + auto config = file_manager_utils::GetCortexConfig(); + config.apiKeys = string_utils::SplitBy(value, ","); + auto result = file_manager_utils::UpdateCortexConfig(config); + if (result.has_error()) { + CLI_LOG_ERROR(result.error()); + } else { + CLI_LOG("Configuration updated successfully!"); + } + return; + } + } + } + + if (!commands::IsServerAlive(host, port)) { + CLI_LOG("Starting server ..."); + commands::ServerStartCmd ssc; + if (!ssc.Exec(host, port)) { + return; + } + } + auto url = url_parser::Url{ .protocol = "http", .host = host + ":" + std::to_string(port), diff --git a/engine/common/api_server_configuration.h b/engine/common/api_server_configuration.h index 63383301b..b3de92c65 100644 --- a/engine/common/api_server_configuration.h +++ b/engine/common/api_server_configuration.h @@ -97,6 +97,15 @@ static const std::unordered_map .accept_value = "string", .default_value = "", .allow_empty = true}}, + {"api_keys", + ApiConfigurationMetadata{ + .name = "api_keys", + .desc = "API header key to get access to server APIs", + .group = "Token", + .accept_value = "comma separated", + .default_value = "", + .allow_empty = true}}, + }; class ApiServerConfiguration { diff --git a/engine/main.cc b/engine/main.cc index d407726e0..623e941a1 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -255,6 +255,12 @@ void RunServer(std::optional host, std::optional port, static const std::unordered_set public_endpoints = { "/openapi.json", "/healthz", "/processManager/destroy"}; + if (req->getHeader("Authorization").empty() && + req->path() == "/v1/configs") { + CTL_WRN("Require API key to access /v1/configs"); + return false; + } + // If API key is not set, skip validation if (api_keys.empty()) { return true; From 8404e54c102c6d432da96e3f9225f986c4d59ca1 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 20 Mar 2025 18:07:45 +0700 Subject: [PATCH 3/4] feat: add option to start server with --api_keys (#2156) Co-authored-by: sangjanai --- engine/main.cc | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/engine/main.cc b/engine/main.cc index 623e941a1..c95c05d82 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -260,7 +260,7 @@ void RunServer(std::optional host, std::optional port, CTL_WRN("Require API key to access /v1/configs"); return false; } - + // If API key is not set, skip validation if (api_keys.empty()) { return true; @@ -383,6 +383,7 @@ void print_help() { "~/cortexcpp)\n"; std::cout << " --host Host name (default: 127.0.0.1)\n"; std::cout << " --port Port number (default: 39281)\n"; + std::cout << " --api_configs Keys to acess API endpoints\n"; std::cout << " --ignore_cout Ignore cout output\n"; std::cout << " --loglevel Set log level\n"; @@ -411,6 +412,7 @@ int main(int argc, char* argv[]) { std::optional server_host; std::optional server_port; + std::optional api_keys; bool ignore_cout_log = false; #if defined(_WIN32) for (int i = 0; i < argc; i++) { @@ -427,6 +429,8 @@ int main(int argc, char* argv[]) { server_host = cortex::wc::WstringToUtf8(argv[i + 1]); } else if (command == L"--port") { server_port = std::stoi(argv[i + 1]); + } else if (command == L"--api_keys") { + api_keys = cortex::wc::WstringToUtf8(argv[i + 1]); } else if (command == L"--ignore_cout") { ignore_cout_log = true; } else if (command == L"--loglevel") { @@ -447,6 +451,8 @@ int main(int argc, char* argv[]) { server_host = argv[i + 1]; } else if (strcmp(argv[i], "--port") == 0) { server_port = std::stoi(argv[i + 1]); + } else if (strcmp(argv[i], "--api_keys") == 0) { + api_keys = argv[i + 1]; } else if (strcmp(argv[i], "--ignore_cout") == 0) { ignore_cout_log = true; } else if (strcmp(argv[i], "--loglevel") == 0) { @@ -482,6 +488,15 @@ int main(int argc, char* argv[]) { } } + if (api_keys) { + auto config = file_manager_utils::GetCortexConfig(); + config.apiKeys = string_utils::SplitBy(*api_keys, ","); + auto result = file_manager_utils::UpdateCortexConfig(config); + if (result.has_error()) { + CTL_ERR(result.error()); + } + } + // check if migration is needed if (auto res = cortex::migr::MigrationManager( cortex::db::Database::GetInstance().db()) From e79abcb60c5a2a6778e6441d6d44ca2df1a1eb11 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Thu, 20 Mar 2025 18:37:24 +0700 Subject: [PATCH 4/4] fix: set /events as public endpoint (#2157) Co-authored-by: sangjanai --- engine/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/main.cc b/engine/main.cc index c95c05d82..1ce173a7a 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -253,7 +253,7 @@ void RunServer(std::optional host, std::optional port, auto validate_api_key = [config_service](const drogon::HttpRequestPtr& req) { auto api_keys = config_service->GetApiServerConfiguration()->api_keys; static const std::unordered_set public_endpoints = { - "/openapi.json", "/healthz", "/processManager/destroy"}; + "/openapi.json", "/healthz", "/processManager/destroy", "/events"}; if (req->getHeader("Authorization").empty() && req->path() == "/v1/configs") {