-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.yml.example
More file actions
770 lines (675 loc) · 36.6 KB
/
config.yml.example
File metadata and controls
770 lines (675 loc) · 36.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
# ╔══════════════════════════════════════════════════════════╗
# ║ ProjectGabriel Configuration ║
# ╚══════════════════════════════════════════════════════════╝
# Copy this file to config.yml and fill in your values.
# Display name used in the WebUI, console logs, OBS overlays, and chatbox.
app_name: "Gabriel"
# Which brain to run. Two options:
# gemini_live - cloud Gemini Live websocket (the original setup, native voice)
# local - LM Studio for the LLM + Moonshine v2 for STT + your chosen
# external TTS provider (qwen3 / hoppou / chirp3hd / tiktok / plugin)
# Local mode is fully offline aside from a few short flash-lite sub-agents
# (memory recall, conversation summary). Local mode REQUIRES one of the
# external TTS providers under tts:* to be enabled, since LM Studio has no
# native voice.
backend: "gemini_live"
# Privacy controls. Off by default. See README "Privacy" section.
privacy:
# When true, every Gemini Live session writes a JSON transcript of the
# whole conversation (user transcripts, assistant transcripts, tool
# calls, tool responses) to data/conversations/<timestamp>.json. Off by
# default. Turn on if you want a personal log to grep / replay later,
# leave off if you do not want any of it on disk.
save_conversations: false
# Logging. Controls how chatty the terminal is.
logging:
# DEBUG / INFO / WARNING / ERROR. Use DEBUG to see everything including
# third party libs (httpx, discord internals, etc), INFO is the everyday
# level, WARNING hides per-event status spam.
level: "INFO"
gemini:
# Primary API key (required) - get from https://aistudio.google.com/apikey
api_key: "YOUR_GEMINI_API_KEY_HERE"
# Backup keys for automatic rotation when rate limited (optional)
# The system will cycle through these when the primary key hits quota limits
backup_keys: []
# - "BACKUP_KEY_1"
# - "BACKUP_KEY_2"
# - "BACKUP_KEY_3"
# Gemini Live model - must support native audio
# Options: "gemini-3.1-flash-live-preview" (latest, lower latency)
# "gemini-2.5-flash-native-audio-preview-09-2025" (legacy)
# "gemini-2.5-flash-native-audio-preview-12-2025" (legacy)
model: "gemini-2.5-flash-native-audio-preview-09-2025"
# System prompt -- select a named prompt from config/prompts/prompts.yml
prompt: "default"
# ── Voice Configuration ──
# Prebuilt voice name. Available voices:
# Puck, Charon, Kore, Fenrir, Aoede, Leda, Orus, Zephyr
voice: "Puck"
# ── Voice Activity Detection (VAD) ──
# Controls how Gemini detects when you start/stop speaking.
#
# Two modes:
# 1. Automatic (disabled: false) - Server-side VAD handles everything.
# Simple, works well for most setups. Just configure sensitivity.
# 2. Manual (disabled: true) - Client-side energy-based VAD.
# Sends activityStart/activityEnd signals to Gemini manually.
# Recommended for production apps (per Google AI community patterns).
# Gives you full control over turn-taking and prevents echo issues.
# Also gates audio during tool calls and model speech to avoid
# 1007/1008 disconnects from stale audio.
vad:
# VAD mode: "auto" (Gemini server-side) or "silero" (local Silero VAD model)
# auto = Uses Gemini's built-in automatic activity detection. Simple, no extra deps.
# silero = Uses Silero VAD locally for speech detection with activityStart/activityEnd.
# More stable on 3.1 models, prevents stalls from noise. Requires torch.
# Also gates audio during tool calls and model speech to avoid
# 1007/1008 disconnects from stale audio leaking to the server.
mode: "auto"
# Sensitivity for detecting the START of speech (auto mode only)
# Options: START_SENSITIVITY_LOW, START_SENSITIVITY_HIGH
# HIGH = triggers faster but may pick up noise, LOW = more selective
start_of_speech_sensitivity: "START_SENSITIVITY_HIGH"
# Sensitivity for detecting the END of speech (auto mode only)
# Options: END_SENSITIVITY_LOW, END_SENSITIVITY_HIGH
# HIGH = ends speech detection quicker, LOW = waits longer
end_of_speech_sensitivity: "END_SENSITIVITY_HIGH"
# Milliseconds of audio to capture BEFORE speech is detected (context padding)
prefix_padding_ms: 100
# Milliseconds of silence required to consider speech complete
# Used in both auto and silero mode. 500ms is good for natural conversation.
silence_duration_ms: 200
# Speech probability threshold for Silero VAD (silero mode only)
# Silero outputs a 0.0-1.0 probability per audio chunk.
# Chunks above this threshold are considered speech.
# Default 0.5 is a good starting point. Lower = more sensitive, higher = stricter.
# silero_threshold: 0.5
# ── Generation Parameters ──
temperature: 1.0 # 0.0-2.0, controls randomness (default: 1.0, higher = more creative)
# ── Thinking Configuration ──
# Enables the model to "think" before responding (inner monologue).
# Requires a model that supports thinking (e.g. gemini-2.5-flash-native-audio-preview-12-2025).
# Dynamic thinking is enabled by default on supported models.
thinking:
# For 2.5 models: thinking token budget - guides how many tokens the model uses to think.
# Higher = more thorough but slower responses. Set to 0 to disable thinking.
# null = use model default (dynamic thinking)
budget: null
# For 3.1 models: thinking level controls thinking depth.
# Options: "minimal" (lowest latency), "low", "medium", "high"
# Default is "minimal" for 3.1 models. Ignored for 2.5 models.
# level: "low"
# Whether to include thought summaries in server responses.
# Useful for debugging what the model is reasoning about.
include_thoughts: false
# ── Advanced Live Features (require v1alpha API, 2.5 models only, auto-fallback if unsupported) ──
# These are NOT supported on 3.1 models and will be automatically skipped.
# enable_affective_dialog: true # Emotional/expressive responses
# proactivity: null # Model proactivity level (float), null = default
# ── Google Search Grounding ──
# Enable Google Search tool for the model to look up current information.
# Auto-detection: if not set, enabled for 2.5 models, disabled for 3.1 (quota issues).
# Set explicitly to override: true = always on, false = always off.
# google_search: true
# ── Context Window Compression ──
# Prevents session termination when context gets too large.
# Without compression: audio-only 15 min, audio+video 2 min.
# With compression: unlimited session length.
# NOTE: These settings are fully disabled when custom_compression is enabled.
# No built-in compression config is sent to Gemini in that case.
context_window_compression:
enabled: true
# Token count that triggers compression (null = 80% of model's 128k context window)
trigger_tokens: 50000
# Tokens to keep after compression (null = trigger_tokens / 2)
target_tokens: 25000
# ── Custom Context Compression ──
# When enabled, replaces Gemini's built-in sliding window compression with a
# smarter approach: when tokens near the threshold, a lightweight model summarizes
# the conversation, clears the session, and seeds the summary as initial context.
# This avoids repeated 1007 errors that Gemini's built-in compression can cause.
custom_compression:
enabled: false
# Token count that triggers the summarize-and-reconnect cycle
trigger_tokens: 100000
# Model used for summarization (lightweight, fast)
model: "gemini-3.1-flash-lite"
# ── Session Resumption ──
# Controls how aggressively session handles are cleared on errors and
# how many recent messages are replayed as context on fresh reconnects.
session:
# Number of consecutive errors (e.g. 1007) before clearing session handle and starting fresh.
# Lower = faster recovery but discards handle sooner. Default: 1.
error_threshold: 1
# Number of recent user/assistant messages to replay as context on fresh reconnect.
# These are sent to the new session so the model knows what was being discussed.
replay_messages: 10
# ══════════════════════════════════════════════════════════
# Local backend (LM Studio + Moonshine + external TTS)
# ══════════════════════════════════════════════════════════
# Only used when backend: local at the top of this file. Otherwise ignored.
# Requirements:
# 1. LM Studio running with a chat model loaded and server enabled
# (Developer tab -> Start Server). default endpoint is the one below.
# 2. One of the external TTS providers below (tts.qwen3 / hoppou /
# chirp3_hd / tiktok or a plugin) enabled. local mode refuses to
# start without a TTS provider since the LLM has no voice of its own.
# 3. Moonshine Voice installed (pip install moonshine-voice). First
# run downloads the chosen streaming arch into the user cache.
local:
llm:
# OpenAI compatible endpoint. LM Studio defaults to localhost:1234.
base_url: "http://localhost:1234/v1"
# Model identifier as shown in LM Studio. The default placeholder works
# because LM Studio routes to whatever is loaded, but be explicit when
# you can.
model: "local-model"
api_key: "lm-studio"
temperature: 0.8
top_p: 0.95
max_tokens: 1024
# Rolling chat history kept in context across turns. Higher = more
# memory but more tokens per request.
history_messages: 30
# Per-request timeout in seconds.
request_timeout: 120
# Safety cap on tool call -> response -> tool call -> response loops in
# a single user turn. If the model wants more iterations than this we
# stop and let it speak.
max_tool_iterations: 6
vision:
# When true, captures the screen and attaches an image to every user
# turn. Requires a multimodal model loaded in LM Studio (eg qwen2.5-vl).
enabled: false
# Longest edge of the resized image. Smaller = faster, fewer tokens.
max_size: 768
# JPEG quality (1-100) for the encoded image.
quality: 70
stt:
# Moonshine streaming model arch. Picks weights via moonshine_voice.
# Options:
# tiny_streaming 34M params, ~69ms TTFT, lowest accuracy
# small_streaming 123M params, ~165ms TTFT, recommended default
# medium_streaming 245M params, ~269ms TTFT, best accuracy
model: "small_streaming"
# Language tag passed to moonshine_voice.download_model. Only English
# has full streaming weights; other languages fall back to base v2.
language: "en"
# Drop any speech segment shorter than this. Filters out keyboard
# clicks, coughs, brief mouth noises.
min_speech_ms: 400
# Hard cap on a single utterance so a noisy room can't grow the buffer
# forever. The utterance is force-finalised at this length.
max_utterance_ms: 30000
# Keep this much audio from BEFORE VAD triggered so we never clip the
# first phoneme. 200-400ms is the sweet spot.
pre_roll_ms: 300
# Silero VAD speech-probability threshold (0.0-1.0). Lower = more
# sensitive (picks up quieter speech, more false triggers). Higher =
# stricter (might miss soft talkers). Defaults to gemini.vad.silero_threshold
# if unset, which itself defaults to 0.5. Try 0.3 if Gabriel keeps
# missing your voice, 0.6+ if the mic triggers on background noise.
vad_threshold: 0.4
# How many ms of silence before we close the utterance and send it to
# transcription. Lower = snappier turn-taking but cuts off mid-sentence
# pauses. Higher = more patient. Falls back to gemini.vad.silence_duration_ms.
silence_ms: 600
# ══════════════════════════════════════════════════════════
# TTS Provider
# ══════════════════════════════════════════════════════════
# Choose which voice to use for AI speech output.
# "gemini" = use Gemini Live's native audio (default)
# "qwen3" = discard Gemini audio, use Qwen3 TTS server instead
# "hoppou" = discard Gemini audio, use Hoppou AI cloud TTS
# "chirp3_hd" = discard Gemini audio, use Google Cloud Chirp 3: HD streaming TTS
# "tiktok" = discard Gemini audio, use TikTok's TTS API (free, no API key needed)
tts:
provider: "gemini"
# Providers the AI can switch to at runtime via the switchTTSProvider tool.
# Must be a list of provider names: "gemini", "qwen3", "hoppou", "chirp3_hd", "tiktok"
switchable_providers: ["gemini"]
# Qwen3 TTS server settings (only used when provider is "qwen3")
qwen3:
# URL of your Qwen3 TTS server
base_url: "http://localhost:7860"
# TTS mode: "voice_clone", "custom", or "voice_design"
mode: "voice_clone"
# Language for synthesis
language: "English"
# ── Voice Clone settings ──
# Use a preset reference voice (from server's preset list)
ref_preset: ""
# OR provide a local reference audio file + transcript
ref_audio: ""
ref_text: ""
# x-vector only (faster) vs full clone (more accurate)
xvec_only: true
# ── Custom Voice settings ──
# Speaker name (for "custom" mode, requires CustomVoice model)
speaker: ""
# ── Voice Design / Custom instruct ──
# Text description of desired voice (for "voice_design" or "custom" mode)
instruct: ""
# ── Generation parameters ──
chunk_size: 8 # Streaming chunk size (tokens per chunk)
temperature: 0.9
top_k: 50
repetition_penalty: 1.05
# Hoppou AI cloud TTS settings (only used when provider is "hoppou")
hoppou:
# API endpoint
api_url: "https://api.hoppou.ai/tts"
# API key (get one at https://hoppou.ai/api-portal/)
api_key: ""
# Voice preset name
voice: "alba"
# Model name
model: "tts-1"
# Google Cloud Chirp 3: HD TTS settings (only used when provider is "chirp3_hd")
# Uses the streaming API for low-latency synthesis with Google's latest HD voices.
# Requires google-cloud-texttospeech package.
chirp3_hd:
# API key for Google Cloud TTS (get from https://console.cloud.google.com/apis/credentials)
# Leave empty to use Application Default Credentials (ADC) instead
api_key: ""
# Backup keys for automatic rotation on rate limit (429) errors
# Free tier: ~500k chars/month. When exhausted, rotates to next key automatically.
backup_keys: []
# - "BACKUP_CLOUD_KEY_1"
# - "BACKUP_CLOUD_KEY_2"
# Voice name -- see https://docs.cloud.google.com/text-to-speech/docs/chirp3-hd
# Available: Achernar, Achird, Algenib, Algieba, Alnilam, Aoede, Autonoe,
# Callirrhoe, Charon, Despina, Enceladus, Erinome, Fenrir, Gacrux, Iapetus,
# Kore, Laomedeia, Leda, Orus, Puck, Pulcherrima, Rasalgethi, Sadachbia,
# Sadaltager, Schedar, Sulafat, Umbriel, Vindemiatrix, Zephyr, Zubenelgenubi
voice: "Kore"
# Language code (e.g. en-US, ja-JP, de-DE, fr-FR)
language_code: "en-US"
# Speaking rate (0.25 to 2.0, 1.0 = normal)
speaking_rate: 1.0
# TikTok TTS settings (only used when provider is "tiktok")
# Uses the free community Weilbyte TikTok TTS proxy (no auth needed).
# See: https://github.com/Weilbyte/tiktok-tts
tiktok:
# Voice code to use. Full list: https://github.com/oscie57/tiktok-voice/wiki/Voice-Codes
#
# Popular voices:
# en_us_001 English US Female 1 en_us_006 English US Male 1
# en_us_002 English US Female 2 en_us_007 English US Male 2
# en_us_009 English US Male 3 en_us_010 English US Male 4
# en_uk_001 English UK Male 1 en_uk_003 English UK Male 2
# en_au_001 English AU Female en_au_002 English AU Male
# en_male_narration Narrator en_male_funny Wacky
# en_female_emotional Peaceful en_male_cody Serious
#
# Disney voices:
# en_us_ghostface Ghost Face en_us_chewbacca Chewbacca
# en_us_c3po C3PO en_us_stitch Stitch
# en_us_rocket Rocket en_us_stormtrooper Stormtrooper
# en_female_madam_leota Madame Leota en_male_ghosthost Ghost Host
# en_male_pirate Pirate
#
# Singing voices:
# en_female_f08_salut_damour Alto en_male_m03_lobby Tenor
# en_male_m03_sunshine_soon Sunshine Soon
# en_female_f08_warmy_breeze Warmy Breeze
# en_female_ht_f08_glorious Glorious
# en_male_sing_funny_it_goes_up It Goes Up
# en_male_m2_xhxs_m03_silly Chipmunk
# en_female_ht_f08_wonderful_world Dramatic
#
# Other languages:
# fr_001 / fr_002 French de_001 / de_002 German
# es_002 Spanish es_mx_002 Spanish MX
# br_001 / br_003 / br_004 / br_005 Portuguese BR
# jp_001 / jp_003 / jp_005 / jp_006 Japanese
# kr_002 / kr_003 / kr_004 Korean
# id_001 Indonesian
voice: "en_us_001"
audio:
# Audio device indices - null uses system default
# Run `python -c "import pyaudio; p=pyaudio.PyAudio(); [print(i, p.get_device_info_by_index(i)['name']) for i in range(p.get_device_count())]"` to list devices
input_device: null
output_device: null
# Sample rates (don't change unless you know what you're doing)
send_sample_rate: 16000
receive_sample_rate: 24000
chunk_size: 1024
# Thinking sound effect -- plays a looping ambient sound while the AI is thinking
# or recalling memories. Fades in when thinking starts, fades out when done.
# Place your sound file (e.g. a subtle hum or chime loop) at the configured path.
thinking_sound:
enabled: false
on_thinking: true # Play during AI thinking (thought generation)
on_recall: true # Play during memory recall (recallMemories tool)
file: "sfx/thinking.wav" # Path to the sound file (wav/ogg/mp3)
volume: 30 # Volume 0-100
fade_in_ms: 500 # Fade-in duration in milliseconds
fade_out_ms: 800 # Fade-out duration in milliseconds
# Real-time voice pitch shifting -- allows the AI to shift its voice pitch
# up or down in semitones via the setVoicePitch tool, like a voice changer.
pitch_shift:
enabled: false # Set to true to allow pitch shifting
max_semitones: 12 # Maximum allowed shift in either direction (1-24)
vrchat:
# VRChat OSC settings
osc_ip: "127.0.0.1"
osc_send_port: 9000
osc_receive_port: 9001 # Port to receive avatar parameters from VRChat (velocity, grounded, etc.)
# Seconds to display each chatbox page before advancing
chatbox_page_delay: 3.0
# Shared limiter for every VRChat chatbox sender, including model text, music UI, idle banners, and tools.
# VRChat now allows short bursts instead of a flat timeout. Keep enabled unless you know your setup is exempt.
chatbox_rate_limiter:
enabled: true
capacity: 5
window_seconds: 5.0
safety_margin_seconds: 0.1
# Used only when the leaky bucket limiter is disabled.
legacy_min_interval_seconds: 1.27
# Idle chatbox banner -- shown in VRChat chatbox when the AI is idle.
# Displays a customizable banner with up to 3 lines of text, dividers,
# active session time, and current clock. Respects the 144-char chatbox limit.
idle_chatbox:
enabled: false
# Header text at the top of the banner
banner: "Gabriel AI"
# Character used for divider lines (also used in the live music gen display)
divider: "\u2500"
# Number of times the divider character is repeated (also used in the live music gen display)
divider_length: 14
# Up to 3 lines of text between dividers (empty strings are skipped)
lines:
- "VRChat AI Assistant"
- "Listening for voice"
- ""
# How often (seconds) to refresh the banner (updates clock/active time)
update_interval: 30
# ══════════════════════════════════════════════════════════
# VRChat API (for avatar switching)
# ══════════════════════════════════════════════════════════
# Required for searchAvatars and switchAvatar tools.
# Credentials are used to authenticate with the VRChat API.
# Session cookies are persisted to data/vrchat_cookies.json.
vrchat_api:
# VRChat account username (or email)
username: ""
# VRChat account password
password: ""
# TOTP secret for automatic 2FA (optional)
# This is the secret key from your authenticator app setup (NOT the 6-digit code).
# If set, 2FA is handled automatically. Requires: pip install pyotp
# Leave empty to skip auto-2FA (you'll need to manually provide codes).
totp_secret: ""
# Allow the model to edit your VRChat profile bio via updateStatus tool.
# When false (default), only status and statusDescription can be changed.
allow_bio_edit: false
# Group ID for the inviteToGroup tool. The model can invite people to this
# group when asked. Format: grp_00000000-0000-0000-0000-000000000000
# Leave empty to disable the inviteToGroup tool. You must have invite
# permissions in the group (usually group owner or roles with that power).
group_id: ""
music:
# Folder containing local music files (.mp3, .wav, .ogg, .flac)
music_dir: "sfx/music"
yolo:
# Master switch -- set to false to completely disable the player-following system
# (removes the tool declarations from Gemini and skips tracker init)
enabled: true
# Live YOLO stream + per-setting sliders are in the main WebUI under the Vision tab.
# YOLOv8n model auto-downloads here if missing
model_dir: "models/yolov8"
model_name: "yolov8n.pt"
face_tracker:
# Face tracking - smoothly looks at faces using yolov8n-face model
# When AI is speaking: locks onto the closest face
# When idle: randomly glances at visible faces every 5-10 seconds
enabled: false
wanderer:
# Autonomous wandering using depth estimation for obstacle avoidance
# AI can toggle this to freely explore VRChat maps while avoiding walls
enabled: false
# Depth model: "depth-anything-v2-small" (fast, ~50ms), "depth-anything-v2-base" (balanced),
# "dpt-large" (heavy, ~450ms)
model: "depth-anything-v2-small"
# Use FP16 on GPU (halves VRAM, ~2x faster, no quality loss)
fp16: true
# Remote depth server (offload inference to a dedicated GPU machine)
# See depth_server/ for the server setup
depth_server:
enabled: false
url: "http://192.168.1.x:8780"
api_key: "your-secret-key"
vision:
# Screen capture for Gemini Live vision
# Allows the AI to SEE what's on your screen
enabled: true
# Monitor index to capture (0 = all monitors combined, 1 = primary, 2 = secondary, etc.)
# Run `python -c "import mss; s=mss.mss(); [print(i, m) for i,m in enumerate(s.monitors)]"` to list
monitor: 1
# Seconds between screen captures (lower = more responsive but uses more tokens)
# Note: audio+video sessions are limited to 2 minutes, use context window compression
# For 3.1 models on free tier: auto-bumped to 2.0s if set below that (token optimization)
interval: 1.0
# Max resolution (images are scaled to fit this size, preserving aspect ratio)
# For 3.1 models: auto-capped at 768 to reduce payload size
max_size: 1024
# JPEG quality (1-100, higher = better quality but larger payload)
# For 3.1 models: auto-capped at 60 to reduce payload size
quality: 80
# Media resolution controls how many tokens each image costs in the Live API.
# Options: "low" (280 tokens), "medium" (560 tokens), "high" (1120 tokens)
# Auto-defaults to "low" for 3.1 models if not set (critical for free tier 65K TPM limit).
# For 2.5 models the default is unset (uses API default ~256 tokens with pan & scan).
# media_resolution: "low"
# Pause screen capture while AI is speaking or music is playing (saves tokens).
# Live music (music_gen) is excluded from this pause so the AI can still see reactions.
# Set to false if you want the AI to always see the screen even while talking.
pause_on_output: true
# Pause screen capture when nobody is interacting with the AI.
# Instead of fully stopping, vision slows down to idle_interval to save tokens
# while still keeping some awareness. Resumes normal speed when someone speaks
# or any task becomes active. Works with both auto and Silero VAD modes.
pause_on_idle: true
# Seconds between vision frames when idle. Only used when pause_on_idle is true.
# Lower = more awareness while idle but more tokens, higher = saves tokens.
idle_interval: 15.0
# ══════════════════════════════════════════════════════════
# Persistent Memory System
# ══════════════════════════════════════════════════════════
memory:
# Enable memory system
enabled: true
# Backend: "sqlite" (local file) or "mongo" (MongoDB Atlas)
backend: "sqlite"
# SQLite settings (used if backend is "sqlite")
sqlite_path: "data/gabriel_memories.sqlite"
# MongoDB settings (used if backend is "mongo")
# Set GABRIEL_MONGO_URI environment variable, or configure below
mongo_uri: ""
mongo_db: ""
mongo_collection: ""
# Memory TTL (time-to-live) settings
quick_note_ttl_hours: 6 # Quick notes auto-delete after 6 hours
short_term_ttl_days: 7 # Short-term memories auto-delete after 7 days
# Rate limiting for quick notes (prevents spam)
note_min_interval_seconds: 120 # Min seconds between quick notes
dedupe_window_seconds: 300 # Deduplication window
# How many recent memories to include in the system prompt
prompt_memory_count: 15
# RAG System (semantic memory recall via embeddings + vector search)
# When disabled, uses legacy keyword recall.
rag_enabled: false
# RAG provider: "gemini" (cloud, requires MongoDB) or "local" (ChromaDB + LM Studio)
# "gemini" - uses Gemini embedding API + MongoDB Atlas vector search
# "local" - uses local embedding model via LM Studio + ChromaDB vector DB (no cloud needed)
rag_provider: "gemini"
# -- Gemini RAG settings (rag_provider: "gemini") --
# Requires MongoDB backend. Gemini embedding API has 100 RPM limit on free tier.
embedding_model: "gemini-embedding-001" # Gemini embedding model
embedding_dims: 768 # Output dimensions (lower = faster, max 3072)
# -- Local RAG settings (rag_provider: "local") --
# Requires LM Studio running with an embedding model loaded.
# Works with any backend (sqlite or mongo).
# See README for setup instructions.
lm_studio_url: "http://localhost:1234" # LM Studio server URL
local_embedding_model: "text-embedding-embeddinggemma-300m-qat" # Model loaded in LM Studio
chroma_dir: "data/gabriel_chroma_db" # ChromaDB storage directory
# Vector similarity thresholds (per-provider, since local models score lower)
vector_min_score_gemini: 0.82 # Gemini embeddings: higher scores, stricter threshold
vector_min_score_local: 0.55 # Local embeddings (LM Studio): lower scores, looser threshold
# vector_min_score: 0.82 # (legacy) single threshold for both, overrides the active provider
# ══════════════════════════════════════════════════════════
# Avatar Emotion/Animation System
# ══════════════════════════════════════════════════════════
emotions:
# Enable emotion system
enabled: false
# VRChat Avatar ID (optional, for logging/tracking)
avatar_id: ""
# Default duration for non-looping animations (seconds)
default_duration: 3.0
# How often to switch between talking animations when speaking (seconds)
talking_switch_interval: 5.0
# Idle animation - plays when nobody is speaking for a while
idle_enabled: true # Enable/disable idle animation
idle_animation: "sad-idle" # Animation name to play when idle (must be defined below)
idle_timeout: 10 # Seconds of silence before idle animation starts
# Thinking animation - plays while AI is thinking or recalling memories
# Set to a defined animation name, or leave empty to disable
thinking_animation: "" # Animation name to play when thinking (must be defined below)
# ── Animations ──
# Each animation maps to its VRChat OSC parameter path
# looping: true = stays on until stopAnimation is called
# auto_talking: true = auto-triggered when AI speaks (not manually callable)
animations:
# Example emotion animation
# sad-idle:
# osc_path: "/avatar/parameters/YourParam/sad-idle"
# category: "emotion"
# looping: true
# Example talking animation (auto-triggered when AI speaks)
# talking-1:
# osc_path: "/avatar/parameters/YourParam/talking-1"
# category: "talking"
# looping: true
# auto_talking: true
# Example dance animation
# dance-1:
# osc_path: "/avatar/parameters/YourParam/dance-1"
# category: "dance"
# looping: true
# ══════════════════════════════════════════════════════════
# OBS Overlay (optional streaming overlay)
# ══════════════════════════════════════════════════════════
# Provides transparent browser source overlays for OBS/streaming.
# When disabled, overlay routes and music broadcast are inactive.
obs:
enabled: false
# ══════════════════════════════════════════════════════════
# Lyria RealTime Music Generation (experimental)
# ══════════════════════════════════════════════════════════
# AI-powered real-time instrumental music generation using Google's Lyria RealTime.
# When enabled, the AI can generate and perform live instrumental music that can
# be steered in real-time (change style, tempo, key, density, brightness, etc.).
# Uses the same Gemini API key. Requires v1alpha API access.
# Output: stereo 48kHz 16-bit PCM via a dedicated audio stream.
music_gen:
# Master switch - set to false to completely disable music generation
# (removes tool declarations from Gemini and skips module init)
enabled: false
# Default BPM (60-200). Leave null to let the model decide based on prompts.
default_bpm: 120
# Default playback volume (0-200, 100 = normal)
volume: 80
# Temperature (0.0-3.0, default 1.1)
temperature: 1.1
# Prompt guidance (0.0-6.0, default 4.0). Higher = stricter prompt following.
guidance: 4.0
# Mute bass by default (solo instrument feel)
mute_bass: true
# Mute drums by default (solo instrument feel)
mute_drums: true
# ══════════════════════════════════════════════════════════
# Web Search (Jina Reader API)
# ══════════════════════════════════════════════════════════
# Provides webSearch and readWebpage tools via Jina Reader API.
# s.jina.ai for web search, r.jina.ai for URL content extraction.
# Only active on Gemini 3.1 models (2.5 models use built-in Google Search instead).
web_search:
enabled: false
# Optional Jina API key for higher rate limits
# Free without key: 20 RPM. With key: 100-1000 RPM.
# Get a free key at https://jina.ai/reader/
jina_api_key: ""
# ══════════════════════════════════════════════════════════
# Discord Selfbot Integration
# ══════════════════════════════════════════════════════════
# Uses discord.py-self for a selfbot that has its own Gemini Live
# session and can respond to messages, relay info to VRChat, etc.
# Configuration lives in discord_bot/config.yml (separate file).
discord_bot:
enabled: false
# ══════════════════════════════════════════════════════════
# Social Server (AI-to-AI Messaging)
# ══════════════════════════════════════════════════════════
# Connect to a ProjectGabriel Social Server to message other people,
# manage friends, and see who's online. Run the server from social_server/.
# See social_server/README.md for setup instructions.
social:
enabled: false
# URL of the social server
server_url: "http://localhost:3000"
# Your API key (must match a key in the social server's config.yml)
# Leave blank if the server is in open mode
api_key: "YOUR_SOCIAL_API_KEY"
# Password for open mode servers (used to register/login without an API key)
# Leave blank if using API key auth
password: ""
# Your AI's username (must match the username for your key in the server config)
# In open mode, this is the username your AI will register with
username: "Gabriel"
# Short description of your AI shown to other users
description: "A VRChat AI companion"
# If true, your AI appears offline to others (won't show in online lists)
# You can still send/receive messages, just won't appear online
appear_offline: false
# How often to send heartbeat pings (seconds)
heartbeat_interval: 30
# How often to poll for new messages when WebSocket is unavailable (seconds)
message_check_interval: 60
# Seconds to wait after receiving a message before prompting a reply.
# Messages arrive as context (turn_complete=false). After this delay
# with no new messages, the AI is nudged to consider replying (turn_complete=true).
idle_reply_delay: 300
# ══════════════════════════════════════════════════════════
# Plugins
# ══════════════════════════════════════════════════════════
# Drop-in plugin system. Anything in ./plugins/<name>/ with a plugin.yml
# manifest gets auto-loaded on startup. See plugins/README.md for the
# full author guide. Plugins can register Gemini tools, TTS providers,
# STT providers, and subscribe to lifecycle events (startup, shutdown,
# message_in, message_out).
#
# Per-plugin enable/disable lives in config/tools.yml under the `plugins:`
# block, alongside per-tool toggles. This section only holds runtime
# settings the plugins read via ctx.plugin_config(...).
plugins:
# Master toggle for the whole plugin system. Set false to skip loading
# any plugins at all (useful for debugging if a plugin is misbehaving).
enabled: true
# Trust mode. When false (default) plugins get a sandboxed view of the
# host config that hides secrets (gemini api_key, vrchat password,
# mongo connection string, discord token, etc). Plugins should store
# their own settings under plugins.<name>.* and read them through
# ctx.plugin_config(), not by reaching into ctx.config.api_key.
#
# Flip this to true if you have an older plugin that needs raw access
# to those, like the diary plugin which uses the main gemini api_key
# for its background sub-agent. Only do this for plugins you trust,
# any loaded plugin will be able to read every secret in this file.
trusted: false
# Per-tool and per-plugin enable toggles live in config/tools.yml.
# Run the configurator or edit that file directly to hide individual
# tools from Gemini.