Codestin Search App

770 lines (675 loc) · 36.6 KB
# ╔══════════════════════════════════════════════════════════╗
# ║              ProjectGabriel Configuration                ║
# ╚══════════════════════════════════════════════════════════╝
# Copy this file to config.yml and fill in your values.
# Display name used in the WebUI, console logs, OBS overlays, and chatbox.
app_name: "Gabriel"
# Which brain to run. Two options:
#   gemini_live - cloud Gemini Live websocket (the original setup, native voice)
#   local       - LM Studio for the LLM + Moonshine v2 for STT + your chosen
#                 external TTS provider (qwen3 / hoppou / chirp3hd / tiktok / plugin)
# Local mode is fully offline aside from a few short flash-lite sub-agents
# (memory recall, conversation summary). Local mode REQUIRES one of the
# external TTS providers under tts:* to be enabled, since LM Studio has no
# native voice.
backend: "gemini_live"
# Privacy controls. Off by default. See README "Privacy" section.
  # When true, every Gemini Live session writes a JSON transcript of the
  # whole conversation (user transcripts, assistant transcripts, tool
  # calls, tool responses) to data/conversations/<timestamp>.json. Off by
  # default. Turn on if you want a personal log to grep / replay later,
  # leave off if you do not want any of it on disk.
  save_conversations: false
# Logging. Controls how chatty the terminal is.
  # DEBUG / INFO / WARNING / ERROR. Use DEBUG to see everything including
  # third party libs (httpx, discord internals, etc), INFO is the everyday
  # level, WARNING hides per-event status spam.
  level: "INFO"
  # Primary API key (required) - get from https://aistudio.google.com/apikey
  api_key: "YOUR_GEMINI_API_KEY_HERE"
  # Backup keys for automatic rotation when rate limited (optional)
  # The system will cycle through these when the primary key hits quota limits
  backup_keys: []
  #  - "BACKUP_KEY_1"
  #  - "BACKUP_KEY_2"
  #  - "BACKUP_KEY_3"
  # Gemini Live model - must support native audio
  # Options: "gemini-3.1-flash-live-preview" (latest, lower latency)
  #          "gemini-2.5-flash-native-audio-preview-09-2025" (legacy)
  #          "gemini-2.5-flash-native-audio-preview-12-2025" (legacy)
  model: "gemini-2.5-flash-native-audio-preview-09-2025"
  # System prompt -- select a named prompt from config/prompts/prompts.yml
  prompt: "default"
  # ── Voice Configuration ──
  # Prebuilt voice name. Available voices:
  #   Puck, Charon, Kore, Fenrir, Aoede, Leda, Orus, Zephyr
  voice: "Puck"
  # ── Voice Activity Detection (VAD) ──
  # Controls how Gemini detects when you start/stop speaking.
  # Two modes:
  #   1. Automatic (disabled: false) - Server-side VAD handles everything.
  #      Simple, works well for most setups. Just configure sensitivity.
  #   2. Manual (disabled: true) - Client-side energy-based VAD.
  #      Sends activityStart/activityEnd signals to Gemini manually.
  #      Recommended for production apps (per Google AI community patterns).
  #      Gives you full control over turn-taking and prevents echo issues.
  #      Also gates audio during tool calls and model speech to avoid
  #      1007/1008 disconnects from stale audio.
    # VAD mode: "auto" (Gemini server-side) or "silero" (local Silero VAD model)
    #   auto   = Uses Gemini's built-in automatic activity detection. Simple, no extra deps.
    #   silero = Uses Silero VAD locally for speech detection with activityStart/activityEnd.
    #            More stable on 3.1 models, prevents stalls from noise. Requires torch.
    #            Also gates audio during tool calls and model speech to avoid
    #            1007/1008 disconnects from stale audio leaking to the server.
    mode: "auto"
    # Sensitivity for detecting the START of speech (auto mode only)
    # Options: START_SENSITIVITY_LOW, START_SENSITIVITY_HIGH
    # HIGH = triggers faster but may pick up noise, LOW = more selective
    start_of_speech_sensitivity: "START_SENSITIVITY_HIGH"
    # Sensitivity for detecting the END of speech (auto mode only)
    # Options: END_SENSITIVITY_LOW, END_SENSITIVITY_HIGH
    # HIGH = ends speech detection quicker, LOW = waits longer
    end_of_speech_sensitivity: "END_SENSITIVITY_HIGH"
    # Milliseconds of audio to capture BEFORE speech is detected (context padding)
    prefix_padding_ms: 100
    # Milliseconds of silence required to consider speech complete
    # Used in both auto and silero mode. 500ms is good for natural conversation.
    silence_duration_ms: 200
    # Speech probability threshold for Silero VAD (silero mode only)
    # Silero outputs a 0.0-1.0 probability per audio chunk.
    # Chunks above this threshold are considered speech.
    # Default 0.5 is a good starting point. Lower = more sensitive, higher = stricter.
    # silero_threshold: 0.5
  # ── Generation Parameters ──
  temperature: 1.0     # 0.0-2.0, controls randomness (default: 1.0, higher = more creative)
  # ── Thinking Configuration ──
  # Enables the model to "think" before responding (inner monologue).
  # Requires a model that supports thinking (e.g. gemini-2.5-flash-native-audio-preview-12-2025).
  # Dynamic thinking is enabled by default on supported models.
  thinking:
    # For 2.5 models: thinking token budget - guides how many tokens the model uses to think.
    # Higher = more thorough but slower responses. Set to 0 to disable thinking.
    # null = use model default (dynamic thinking)
    budget: null
    # For 3.1 models: thinking level controls thinking depth.
    # Options: "minimal" (lowest latency), "low", "medium", "high"
    # Default is "minimal" for 3.1 models. Ignored for 2.5 models.
    # level: "low"
    # Whether to include thought summaries in server responses.
    # Useful for debugging what the model is reasoning about.
    include_thoughts: false
  # ── Advanced Live Features (require v1alpha API, 2.5 models only, auto-fallback if unsupported) ──
  # These are NOT supported on 3.1 models and will be automatically skipped.
  # enable_affective_dialog: true   # Emotional/expressive responses
  # proactivity: null                # Model proactivity level (float), null = default
  # ── Google Search Grounding ──
  # Enable Google Search tool for the model to look up current information.
  # Auto-detection: if not set, enabled for 2.5 models, disabled for 3.1 (quota issues).
  # Set explicitly to override: true = always on, false = always off.
  # google_search: true
  # ── Context Window Compression ──
  # Prevents session termination when context gets too large.
  # Without compression: audio-only 15 min, audio+video 2 min.
  # With compression: unlimited session length.
  # NOTE: These settings are fully disabled when custom_compression is enabled.
  # No built-in compression config is sent to Gemini in that case.
  context_window_compression:
    enabled: true
    # Token count that triggers compression (null = 80% of model's 128k context window)
    trigger_tokens: 50000
    # Tokens to keep after compression (null = trigger_tokens / 2)
    target_tokens: 25000
  # ── Custom Context Compression ──
  # When enabled, replaces Gemini's built-in sliding window compression with a
  # smarter approach: when tokens near the threshold, a lightweight model summarizes
  # the conversation, clears the session, and seeds the summary as initial context.
  # This avoids repeated 1007 errors that Gemini's built-in compression can cause.
  custom_compression:
    enabled: false
    # Token count that triggers the summarize-and-reconnect cycle
    trigger_tokens: 100000
    # Model used for summarization (lightweight, fast)
    model: "gemini-3.1-flash-lite"
  # ── Session Resumption ──
  # Controls how aggressively session handles are cleared on errors and
  # how many recent messages are replayed as context on fresh reconnects.
    # Number of consecutive errors (e.g. 1007) before clearing session handle and starting fresh.
    # Lower = faster recovery but discards handle sooner. Default: 1.
    error_threshold: 1
    # Number of recent user/assistant messages to replay as context on fresh reconnect.
    # These are sent to the new session so the model knows what was being discussed.
    replay_messages: 10
# ══════════════════════════════════════════════════════════
# Local backend (LM Studio + Moonshine + external TTS)
# ══════════════════════════════════════════════════════════
# Only used when backend: local at the top of this file. Otherwise ignored.
# Requirements:
#   1. LM Studio running with a chat model loaded and server enabled
#      (Developer tab -> Start Server). default endpoint is the one below.
#   2. One of the external TTS providers below (tts.qwen3 / hoppou /
#      chirp3_hd / tiktok or a plugin) enabled. local mode refuses to
#      start without a TTS provider since the LLM has no voice of its own.
#   3. Moonshine Voice installed (pip install moonshine-voice). First
#      run downloads the chosen streaming arch into the user cache.
    # OpenAI compatible endpoint. LM Studio defaults to localhost:1234.
    base_url: "http://localhost:1234/v1"
    # Model identifier as shown in LM Studio. The default placeholder works
    # because LM Studio routes to whatever is loaded, but be explicit when
    # you can.
    model: "local-model"
    api_key: "lm-studio"
    temperature: 0.8
    top_p: 0.95
    max_tokens: 1024
    # Rolling chat history kept in context across turns. Higher = more
    # memory but more tokens per request.
    history_messages: 30
    # Per-request timeout in seconds.
    request_timeout: 120
    # Safety cap on tool call -> response -> tool call -> response loops in
    # a single user turn. If the model wants more iterations than this we
    # stop and let it speak.
    max_tool_iterations: 6
    # When true, captures the screen and attaches an image to every user
    # turn. Requires a multimodal model loaded in LM Studio (eg qwen2.5-vl).
    enabled: false
    # Longest edge of the resized image. Smaller = faster, fewer tokens.
    max_size: 768
    # JPEG quality (1-100) for the encoded image.
    quality: 70
    # Moonshine streaming model arch. Picks weights via moonshine_voice.
    # Options:
    #   tiny_streaming    34M params, ~69ms TTFT, lowest accuracy
    #   small_streaming   123M params, ~165ms TTFT, recommended default
    #   medium_streaming  245M params, ~269ms TTFT, best accuracy
    model: "small_streaming"
    # Language tag passed to moonshine_voice.download_model. Only English
    # has full streaming weights; other languages fall back to base v2.
    language: "en"
    # Drop any speech segment shorter than this. Filters out keyboard
    # clicks, coughs, brief mouth noises.
    min_speech_ms: 400
    # Hard cap on a single utterance so a noisy room can't grow the buffer
    # forever. The utterance is force-finalised at this length.
    max_utterance_ms: 30000
    # Keep this much audio from BEFORE VAD triggered so we never clip the
    # first phoneme. 200-400ms is the sweet spot.
    pre_roll_ms: 300
    # Silero VAD speech-probability threshold (0.0-1.0). Lower = more
    # sensitive (picks up quieter speech, more false triggers). Higher =
    # stricter (might miss soft talkers). Defaults to gemini.vad.silero_threshold
    # if unset, which itself defaults to 0.5. Try 0.3 if Gabriel keeps
    # missing your voice, 0.6+ if the mic triggers on background noise.
    vad_threshold: 0.4
    # How many ms of silence before we close the utterance and send it to
    # transcription. Lower = snappier turn-taking but cuts off mid-sentence
    # pauses. Higher = more patient. Falls back to gemini.vad.silence_duration_ms.
    silence_ms: 600
# ══════════════════════════════════════════════════════════
# TTS Provider
# ══════════════════════════════════════════════════════════
# Choose which voice to use for AI speech output.
# "gemini"    = use Gemini Live's native audio (default)
# "qwen3"     = discard Gemini audio, use Qwen3 TTS server instead
# "hoppou"    = discard Gemini audio, use Hoppou AI cloud TTS
# "chirp3_hd" = discard Gemini audio, use Google Cloud Chirp 3: HD streaming TTS
# "tiktok"    = discard Gemini audio, use TikTok's TTS API (free, no API key needed)
  provider: "gemini"
  # Providers the AI can switch to at runtime via the switchTTSProvider tool.
  # Must be a list of provider names: "gemini", "qwen3", "hoppou", "chirp3_hd", "tiktok"
  switchable_providers: ["gemini"]
  # Qwen3 TTS server settings (only used when provider is "qwen3")
    # URL of your Qwen3 TTS server
    base_url: "http://localhost:7860"
    # TTS mode: "voice_clone", "custom", or "voice_design"
    mode: "voice_clone"
    # Language for synthesis
    language: "English"
    # ── Voice Clone settings ──
    # Use a preset reference voice (from server's preset list)
    ref_preset: ""
    # OR provide a local reference audio file + transcript
    ref_audio: ""
    ref_text: ""
    # x-vector only (faster) vs full clone (more accurate)
    xvec_only: true
    # ── Custom Voice settings ──
    # Speaker name (for "custom" mode, requires CustomVoice model)
    speaker: ""
    # ── Voice Design / Custom instruct ──
    # Text description of desired voice (for "voice_design" or "custom" mode)
    instruct: ""
    # ── Generation parameters ──
    chunk_size: 8                # Streaming chunk size (tokens per chunk)
    temperature: 0.9
    top_k: 50
    repetition_penalty: 1.05
  # Hoppou AI cloud TTS settings (only used when provider is "hoppou")
    # API endpoint
    api_url: "https://api.hoppou.ai/tts"
    # API key (get one at https://hoppou.ai/api-portal/)
    api_key: ""
    # Voice preset name
    voice: "alba"
    # Model name
    model: "tts-1"
  # Google Cloud Chirp 3: HD TTS settings (only used when provider is "chirp3_hd")
  # Uses the streaming API for low-latency synthesis with Google's latest HD voices.
  # Requires google-cloud-texttospeech package.
  chirp3_hd:
    # API key for Google Cloud TTS (get from https://console.cloud.google.com/apis/credentials)
    # Leave empty to use Application Default Credentials (ADC) instead
    api_key: ""
    # Backup keys for automatic rotation on rate limit (429) errors
    # Free tier: ~500k chars/month. When exhausted, rotates to next key automatically.
    backup_keys: []
    #  - "BACKUP_CLOUD_KEY_1"
    #  - "BACKUP_CLOUD_KEY_2"
    # Voice name -- see https://docs.cloud.google.com/text-to-speech/docs/chirp3-hd
    # Available: Achernar, Achird, Algenib, Algieba, Alnilam, Aoede, Autonoe,
    #   Callirrhoe, Charon, Despina, Enceladus, Erinome, Fenrir, Gacrux, Iapetus,
    #   Kore, Laomedeia, Leda, Orus, Puck, Pulcherrima, Rasalgethi, Sadachbia,
    #   Sadaltager, Schedar, Sulafat, Umbriel, Vindemiatrix, Zephyr, Zubenelgenubi
    voice: "Kore"
    # Language code (e.g. en-US, ja-JP, de-DE, fr-FR)
    language_code: "en-US"
    # Speaking rate (0.25 to 2.0, 1.0 = normal)
    speaking_rate: 1.0
  # TikTok TTS settings (only used when provider is "tiktok")
  # Uses the free community Weilbyte TikTok TTS proxy (no auth needed).
  # See: https://github.com/Weilbyte/tiktok-tts
    # Voice code to use. Full list: https://github.com/oscie57/tiktok-voice/wiki/Voice-Codes
    # Popular voices:
    #   en_us_001   English US Female 1          en_us_006   English US Male 1
    #   en_us_002   English US Female 2          en_us_007   English US Male 2
    #   en_us_009   English US Male 3            en_us_010   English US Male 4
    #   en_uk_001   English UK Male 1            en_uk_003   English UK Male 2
    #   en_au_001   English AU Female             en_au_002   English AU Male
    #   en_male_narration   Narrator             en_male_funny   Wacky
    #   en_female_emotional   Peaceful           en_male_cody   Serious
    # Disney voices:
    #   en_us_ghostface   Ghost Face             en_us_chewbacca   Chewbacca
    #   en_us_c3po   C3PO                        en_us_stitch   Stitch
    #   en_us_rocket   Rocket                    en_us_stormtrooper   Stormtrooper
    #   en_female_madam_leota   Madame Leota     en_male_ghosthost   Ghost Host
    #   en_male_pirate   Pirate
    # Singing voices:
    #   en_female_f08_salut_damour   Alto         en_male_m03_lobby   Tenor
    #   en_male_m03_sunshine_soon   Sunshine Soon
    #   en_female_f08_warmy_breeze   Warmy Breeze
    #   en_female_ht_f08_glorious   Glorious
    #   en_male_sing_funny_it_goes_up   It Goes Up
    #   en_male_m2_xhxs_m03_silly   Chipmunk
    #   en_female_ht_f08_wonderful_world   Dramatic
    # Other languages:
    #   fr_001 / fr_002   French                 de_001 / de_002   German
    #   es_002   Spanish                         es_mx_002   Spanish MX
    #   br_001 / br_003 / br_004 / br_005   Portuguese BR
    #   jp_001 / jp_003 / jp_005 / jp_006   Japanese
    #   kr_002 / kr_003 / kr_004   Korean
    #   id_001   Indonesian
    voice: "en_us_001"
  # Audio device indices - null uses system default
  # Run `python -c "import pyaudio; p=pyaudio.PyAudio(); [print(i, p.get_device_info_by_index(i)['name']) for i in range(p.get_device_count())]"` to list devices
  input_device: null
  output_device: null
  # Sample rates (don't change unless you know what you're doing)
  send_sample_rate: 16000
  receive_sample_rate: 24000
  chunk_size: 1024
  # Thinking sound effect -- plays a looping ambient sound while the AI is thinking
  # or recalling memories. Fades in when thinking starts, fades out when done.
  # Place your sound file (e.g. a subtle hum or chime loop) at the configured path.
  thinking_sound:
    enabled: false
    on_thinking: true              # Play during AI thinking (thought generation)
    on_recall: true                # Play during memory recall (recallMemories tool)
    file: "sfx/thinking.wav"       # Path to the sound file (wav/ogg/mp3)
    volume: 30                     # Volume 0-100
    fade_in_ms: 500                # Fade-in duration in milliseconds
    fade_out_ms: 800               # Fade-out duration in milliseconds
  # Real-time voice pitch shifting -- allows the AI to shift its voice pitch
  # up or down in semitones via the setVoicePitch tool, like a voice changer.
  pitch_shift:
    enabled: false                 # Set to true to allow pitch shifting
    max_semitones: 12              # Maximum allowed shift in either direction (1-24)
  # VRChat OSC settings
  osc_ip: "127.0.0.1"
  osc_send_port: 9000
  osc_receive_port: 9001  # Port to receive avatar parameters from VRChat (velocity, grounded, etc.)
  # Seconds to display each chatbox page before advancing
  chatbox_page_delay: 3.0
  # Shared limiter for every VRChat chatbox sender, including model text, music UI, idle banners, and tools.
  # VRChat now allows short bursts instead of a flat timeout. Keep enabled unless you know your setup is exempt.
  chatbox_rate_limiter:
    enabled: true
    capacity: 5
    window_seconds: 5.0
    safety_margin_seconds: 0.1
    # Used only when the leaky bucket limiter is disabled.
    legacy_min_interval_seconds: 1.27
  # Idle chatbox banner -- shown in VRChat chatbox when the AI is idle.
  # Displays a customizable banner with up to 3 lines of text, dividers,
  # active session time, and current clock. Respects the 144-char chatbox limit.
  idle_chatbox:
    enabled: false
    # Header text at the top of the banner
    banner: "Gabriel AI"
    # Character used for divider lines (also used in the live music gen display)
    divider: "\u2500"
    # Number of times the divider character is repeated (also used in the live music gen display)
    divider_length: 14
    # Up to 3 lines of text between dividers (empty strings are skipped)
      - "VRChat AI Assistant"
      - "Listening for voice"
    # How often (seconds) to refresh the banner (updates clock/active time)
    update_interval: 30
# ══════════════════════════════════════════════════════════
# VRChat API (for avatar switching)
# ══════════════════════════════════════════════════════════
# Required for searchAvatars and switchAvatar tools.
# Credentials are used to authenticate with the VRChat API.
# Session cookies are persisted to data/vrchat_cookies.json.
vrchat_api:
  # VRChat account username (or email)
  username: ""
  # VRChat account password
  password: ""
  # TOTP secret for automatic 2FA (optional)
  # This is the secret key from your authenticator app setup (NOT the 6-digit code).
  # If set, 2FA is handled automatically. Requires: pip install pyotp
  # Leave empty to skip auto-2FA (you'll need to manually provide codes).
  totp_secret: ""
  # Allow the model to edit your VRChat profile bio via updateStatus tool.
  # When false (default), only status and statusDescription can be changed.
  allow_bio_edit: false
  # Group ID for the inviteToGroup tool. The model can invite people to this
  # group when asked. Format: grp_00000000-0000-0000-0000-000000000000
  # Leave empty to disable the inviteToGroup tool. You must have invite
  # permissions in the group (usually group owner or roles with that power).
  group_id: ""
  # Folder containing local music files (.mp3, .wav, .ogg, .flac)
  music_dir: "sfx/music"
  # Master switch -- set to false to completely disable the player-following system
  # (removes the tool declarations from Gemini and skips tracker init)
  enabled: true
  # Live YOLO stream + per-setting sliders are in the main WebUI under the Vision tab.
  # YOLOv8n model auto-downloads here if missing
  model_dir: "models/yolov8"
  model_name: "yolov8n.pt"
face_tracker:
  # Face tracking - smoothly looks at faces using yolov8n-face model
  # When AI is speaking: locks onto the closest face
  # When idle: randomly glances at visible faces every 5-10 seconds
  enabled: false
  # Autonomous wandering using depth estimation for obstacle avoidance
  # AI can toggle this to freely explore VRChat maps while avoiding walls
  enabled: false
  # Depth model: "depth-anything-v2-small" (fast, ~50ms), "depth-anything-v2-base" (balanced),
  #              "dpt-large" (heavy, ~450ms)
  model: "depth-anything-v2-small"
  # Use FP16 on GPU (halves VRAM, ~2x faster, no quality loss)
  fp16: true
  # Remote depth server (offload inference to a dedicated GPU machine)
  # See depth_server/ for the server setup
  depth_server:
    enabled: false
    url: "http://192.168.1.x:8780"
    api_key: "your-secret-key"
  # Screen capture for Gemini Live vision
  # Allows the AI to SEE what's on your screen
  enabled: true
  # Monitor index to capture (0 = all monitors combined, 1 = primary, 2 = secondary, etc.)
  # Run `python -c "import mss; s=mss.mss(); [print(i, m) for i,m in enumerate(s.monitors)]"` to list
  monitor: 1
  # Seconds between screen captures (lower = more responsive but uses more tokens)
  # Note: audio+video sessions are limited to 2 minutes, use context window compression
  # For 3.1 models on free tier: auto-bumped to 2.0s if set below that (token optimization)
  interval: 1.0
  # Max resolution (images are scaled to fit this size, preserving aspect ratio)
  # For 3.1 models: auto-capped at 768 to reduce payload size
  max_size: 1024
  # JPEG quality (1-100, higher = better quality but larger payload)
  # For 3.1 models: auto-capped at 60 to reduce payload size
  quality: 80
  # Media resolution controls how many tokens each image costs in the Live API.
  # Options: "low" (280 tokens), "medium" (560 tokens), "high" (1120 tokens)
  # Auto-defaults to "low" for 3.1 models if not set (critical for free tier 65K TPM limit).
  # For 2.5 models the default is unset (uses API default ~256 tokens with pan & scan).
  # media_resolution: "low"
  # Pause screen capture while AI is speaking or music is playing (saves tokens).
  # Live music (music_gen) is excluded from this pause so the AI can still see reactions.
  # Set to false if you want the AI to always see the screen even while talking.
  pause_on_output: true
  # Pause screen capture when nobody is interacting with the AI.
  # Instead of fully stopping, vision slows down to idle_interval to save tokens
  # while still keeping some awareness. Resumes normal speed when someone speaks
  # or any task becomes active. Works with both auto and Silero VAD modes.
  pause_on_idle: true
  # Seconds between vision frames when idle. Only used when pause_on_idle is true.
  # Lower = more awareness while idle but more tokens, higher = saves tokens.
  idle_interval: 15.0
# ══════════════════════════════════════════════════════════
# Persistent Memory System
# ══════════════════════════════════════════════════════════
  # Enable memory system
  enabled: true
  # Backend: "sqlite" (local file) or "mongo" (MongoDB Atlas)
  backend: "sqlite"
  # SQLite settings (used if backend is "sqlite")
  sqlite_path: "data/gabriel_memories.sqlite"
  # MongoDB settings (used if backend is "mongo")
  # Set GABRIEL_MONGO_URI environment variable, or configure below
  mongo_uri: ""
  mongo_db: ""
  mongo_collection: ""
  # Memory TTL (time-to-live) settings
  quick_note_ttl_hours: 6      # Quick notes auto-delete after 6 hours
  short_term_ttl_days: 7       # Short-term memories auto-delete after 7 days
  # Rate limiting for quick notes (prevents spam)
  note_min_interval_seconds: 120   # Min seconds between quick notes
  dedupe_window_seconds: 300       # Deduplication window
  # How many recent memories to include in the system prompt
  prompt_memory_count: 15
  # RAG System (semantic memory recall via embeddings + vector search)
  # When disabled, uses legacy keyword recall.
  rag_enabled: false
  # RAG provider: "gemini" (cloud, requires MongoDB) or "local" (ChromaDB + LM Studio)
  # "gemini" - uses Gemini embedding API + MongoDB Atlas vector search
  # "local"  - uses local embedding model via LM Studio + ChromaDB vector DB (no cloud needed)
  rag_provider: "gemini"
  # -- Gemini RAG settings (rag_provider: "gemini") --
  # Requires MongoDB backend. Gemini embedding API has 100 RPM limit on free tier.
  embedding_model: "gemini-embedding-001"   # Gemini embedding model
  embedding_dims: 768                       # Output dimensions (lower = faster, max 3072)
  # -- Local RAG settings (rag_provider: "local") --
  # Requires LM Studio running with an embedding model loaded.
  # Works with any backend (sqlite or mongo).
  # See README for setup instructions.
  lm_studio_url: "http://localhost:1234"          # LM Studio server URL
  local_embedding_model: "text-embedding-embeddinggemma-300m-qat"  # Model loaded in LM Studio
  chroma_dir: "data/gabriel_chroma_db"                 # ChromaDB storage directory
  # Vector similarity thresholds (per-provider, since local models score lower)
  vector_min_score_gemini: 0.82               # Gemini embeddings: higher scores, stricter threshold
  vector_min_score_local: 0.55                # Local embeddings (LM Studio): lower scores, looser threshold
  # vector_min_score: 0.82                    # (legacy) single threshold for both, overrides the active provider
# ══════════════════════════════════════════════════════════
# Avatar Emotion/Animation System
# ══════════════════════════════════════════════════════════
  # Enable emotion system
  enabled: false
  # VRChat Avatar ID (optional, for logging/tracking)
  avatar_id: ""
  # Default duration for non-looping animations (seconds)
  default_duration: 3.0
  # How often to switch between talking animations when speaking (seconds)
  talking_switch_interval: 5.0
  # Idle animation - plays when nobody is speaking for a while
  idle_enabled: true               # Enable/disable idle animation
  idle_animation: "sad-idle"       # Animation name to play when idle (must be defined below)
  idle_timeout: 10                 # Seconds of silence before idle animation starts
  # Thinking animation - plays while AI is thinking or recalling memories
  # Set to a defined animation name, or leave empty to disable
  thinking_animation: ""           # Animation name to play when thinking (must be defined below)
  # ── Animations ──
  # Each animation maps to its VRChat OSC parameter path
  # looping: true = stays on until stopAnimation is called
  # auto_talking: true = auto-triggered when AI speaks (not manually callable)
  animations:
    # Example emotion animation
    # sad-idle:
    #   osc_path: "/avatar/parameters/YourParam/sad-idle"
    #   category: "emotion"
    #   looping: true
    # Example talking animation (auto-triggered when AI speaks)
    # talking-1:
    #   osc_path: "/avatar/parameters/YourParam/talking-1"
    #   category: "talking"
    #   looping: true
    #   auto_talking: true
    # Example dance animation
    # dance-1:
    #   osc_path: "/avatar/parameters/YourParam/dance-1"
    #   category: "dance"
    #   looping: true
# ══════════════════════════════════════════════════════════
# OBS Overlay (optional streaming overlay)
# ══════════════════════════════════════════════════════════
# Provides transparent browser source overlays for OBS/streaming.
# When disabled, overlay routes and music broadcast are inactive.
  enabled: false
# ══════════════════════════════════════════════════════════
# Lyria RealTime Music Generation (experimental)
# ══════════════════════════════════════════════════════════
# AI-powered real-time instrumental music generation using Google's Lyria RealTime.
# When enabled, the AI can generate and perform live instrumental music that can
# be steered in real-time (change style, tempo, key, density, brightness, etc.).
# Uses the same Gemini API key. Requires v1alpha API access.
# Output: stereo 48kHz 16-bit PCM via a dedicated audio stream.
  # Master switch - set to false to completely disable music generation
  # (removes tool declarations from Gemini and skips module init)
  enabled: false
  # Default BPM (60-200). Leave null to let the model decide based on prompts.
  default_bpm: 120
  # Default playback volume (0-200, 100 = normal)
  volume: 80
  # Temperature (0.0-3.0, default 1.1)
  temperature: 1.1
  # Prompt guidance (0.0-6.0, default 4.0). Higher = stricter prompt following.
  guidance: 4.0
  # Mute bass by default (solo instrument feel)
  mute_bass: true
  # Mute drums by default (solo instrument feel)
  mute_drums: true
# ══════════════════════════════════════════════════════════
# Web Search (Jina Reader API)
# ══════════════════════════════════════════════════════════
# Provides webSearch and readWebpage tools via Jina Reader API.
# s.jina.ai for web search, r.jina.ai for URL content extraction.
# Only active on Gemini 3.1 models (2.5 models use built-in Google Search instead).
web_search:
  enabled: false
  # Optional Jina API key for higher rate limits
  # Free without key: 20 RPM. With key: 100-1000 RPM.
  # Get a free key at https://jina.ai/reader/
  jina_api_key: ""
# ══════════════════════════════════════════════════════════
# Discord Selfbot Integration
# ══════════════════════════════════════════════════════════
# Uses discord.py-self for a selfbot that has its own Gemini Live
# session and can respond to messages, relay info to VRChat, etc.
# Configuration lives in discord_bot/config.yml (separate file).
discord_bot:
  enabled: false
# ══════════════════════════════════════════════════════════
# Social Server (AI-to-AI Messaging)
# ══════════════════════════════════════════════════════════
# Connect to a ProjectGabriel Social Server to message other people,
# manage friends, and see who's online. Run the server from social_server/.
# See social_server/README.md for setup instructions.
  enabled: false
  # URL of the social server
  server_url: "http://localhost:3000"
  # Your API key (must match a key in the social server's config.yml)
  # Leave blank if the server is in open mode
  api_key: "YOUR_SOCIAL_API_KEY"
  # Password for open mode servers (used to register/login without an API key)
  # Leave blank if using API key auth
  password: ""
  # Your AI's username (must match the username for your key in the server config)
  # In open mode, this is the username your AI will register with
  username: "Gabriel"
  # Short description of your AI shown to other users
  description: "A VRChat AI companion"
  # If true, your AI appears offline to others (won't show in online lists)
  # You can still send/receive messages, just won't appear online
  appear_offline: false
  # How often to send heartbeat pings (seconds)
  heartbeat_interval: 30
  # How often to poll for new messages when WebSocket is unavailable (seconds)
  message_check_interval: 60
  # Seconds to wait after receiving a message before prompting a reply.
  # Messages arrive as context (turn_complete=false). After this delay
  # with no new messages, the AI is nudged to consider replying (turn_complete=true).
  idle_reply_delay: 300
# ══════════════════════════════════════════════════════════
# ══════════════════════════════════════════════════════════
# Drop-in plugin system. Anything in ./plugins/<name>/ with a plugin.yml
# manifest gets auto-loaded on startup. See plugins/README.md for the
# full author guide. Plugins can register Gemini tools, TTS providers,
# STT providers, and subscribe to lifecycle events (startup, shutdown,
# message_in, message_out).
# Per-plugin enable/disable lives in config/tools.yml under the `plugins:`
# block, alongside per-tool toggles. This section only holds runtime
# settings the plugins read via ctx.plugin_config(...).
  # Master toggle for the whole plugin system. Set false to skip loading
  # any plugins at all (useful for debugging if a plugin is misbehaving).
  enabled: true
  # Trust mode. When false (default) plugins get a sandboxed view of the
  # host config that hides secrets (gemini api_key, vrchat password,
  # mongo connection string, discord token, etc). Plugins should store
  # their own settings under plugins.<name>.* and read them through
  # ctx.plugin_config(), not by reaching into ctx.config.api_key.
  # Flip this to true if you have an older plugin that needs raw access
  # to those, like the diary plugin which uses the main gemini api_key
  # for its background sub-agent. Only do this for plugins you trust,
  # any loaded plugin will be able to read every secret in this file.
  trusted: false
# Per-tool and per-plugin enable toggles live in config/tools.yml.
# Run the configurator or edit that file directly to hide individual
# tools from Gemini.
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

config.yml.example

Latest commit

History

config.yml.example

File metadata and controls