- defog/llama-3-sqlcoder-8b
- meta-llama/Llama-3.2-1B-Instruct
- microsoft/Phi-3.5-mini-instruct
- google/gemma-2-2b-it
- meta-llama/Llama-3.2-1B-Instruct
# Deploy with docker on Linux:
docker run --gpus all \
-v ~/.cache/huggingface:/root/.cache/huggingface \
-e HF_TOKEN=$HF_TOKEN \
-p 8000:80 \
ghcr.io/huggingface/text-generation-inference:latest \
--model-id $MODEL
- hf.co/defog/sqlcoder-7b-2
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
docker exec -it ollama ollama run hf.co/defog/sqlcoder-7b-2
- Run the application + model on Nvidia A100, H100
- MacOS or X86/Nvidia based machines should have enough GPU memory to support the models.
export HF_TOKEN=<YOUR TOKEN>
docker compose -f docker-compose.demo.yml build
docker compose -f docker-compose.demo.yml up -d