Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 38c5235

Browse files
authored
[7/n] Demo for realtime (openai#1074)
Mostly working demo. Plays audio, shows event log. --- [//]: # (BEGIN SAPLING FOOTER) * openai#1076 * __->__ openai#1074
1 parent 3e0122c commit 38c5235

File tree

4 files changed

+380
-2
lines changed

4 files changed

+380
-2
lines changed

examples/realtime/demo.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import asyncio
2+
import os
3+
import sys
4+
from typing import TYPE_CHECKING
5+
6+
import numpy as np
7+
8+
# Add the current directory to path so we can import ui
9+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
10+
11+
from agents import function_tool
12+
from agents.realtime import RealtimeAgent, RealtimeSession, RealtimeSessionEvent
13+
14+
if TYPE_CHECKING:
15+
from .ui import AppUI
16+
else:
17+
# Try both import styles
18+
try:
19+
# Try relative import first (when used as a package)
20+
from .ui import AppUI
21+
except ImportError:
22+
# Fall back to direct import (when run as a script)
23+
from ui import AppUI
24+
25+
26+
@function_tool
27+
def get_weather(city: str) -> str:
28+
"""Get the weather in a city."""
29+
return f"The weather in {city} is sunny."
30+
31+
32+
agent = RealtimeAgent(
33+
name="Assistant",
34+
instructions="You always greet the user with 'Top of the morning to you'.",
35+
tools=[get_weather],
36+
)
37+
38+
39+
class Example:
40+
def __init__(self) -> None:
41+
self.session = RealtimeSession(agent)
42+
self.ui = AppUI()
43+
self.ui.connected = asyncio.Event()
44+
self.ui.last_audio_item_id = None
45+
# Set the audio callback
46+
self.ui.set_audio_callback(self.on_audio_recorded)
47+
48+
async def run(self) -> None:
49+
self.session.add_listener(self.on_event)
50+
await self.session.connect()
51+
self.ui.set_is_connected(True)
52+
await self.ui.run_async()
53+
54+
async def on_audio_recorded(self, audio_bytes: bytes) -> None:
55+
"""Called when audio is recorded by the UI."""
56+
try:
57+
# Send the audio to the session
58+
await self.session.send_audio(audio_bytes)
59+
except Exception as e:
60+
self.ui.log_message(f"Error sending audio: {e}")
61+
62+
async def on_event(self, event: RealtimeSessionEvent) -> None:
63+
# Display event in the UI
64+
try:
65+
if event.type == "agent_start":
66+
self.ui.add_transcript(f"Agent started: {event.agent.name}")
67+
elif event.type == "agent_end":
68+
self.ui.add_transcript(f"Agent ended: {event.agent.name}")
69+
elif event.type == "handoff":
70+
self.ui.add_transcript(
71+
f"Handoff from {event.from_agent.name} to {event.to_agent.name}"
72+
)
73+
elif event.type == "tool_start":
74+
self.ui.add_transcript(f"Tool started: {event.tool.name}")
75+
elif event.type == "tool_end":
76+
self.ui.add_transcript(f"Tool ended: {event.tool.name}; output: {event.output}")
77+
elif event.type == "audio_end":
78+
self.ui.add_transcript("Audio ended")
79+
elif event.type == "audio":
80+
np_audio = np.frombuffer(event.audio.data, dtype=np.int16)
81+
self.ui.play_audio(np_audio)
82+
elif event.type == "audio_interrupted":
83+
self.ui.add_transcript("Audio interrupted")
84+
elif event.type == "error":
85+
self.ui.add_transcript(f"Error: {event.error}")
86+
elif event.type == "history_updated":
87+
pass
88+
elif event.type == "history_added":
89+
pass
90+
elif event.type == "raw_transport_event":
91+
self.ui.log_message(f"Raw transport event: {event.data}")
92+
else:
93+
self.ui.log_message(f"Unknown event type: {event.type}")
94+
except Exception as e:
95+
# This can happen if the UI has already exited
96+
self.ui.log_message(f"Event handling error: {str(e)}")
97+
98+
99+
if __name__ == "__main__":
100+
example = Example()
101+
asyncio.run(example.run())

examples/realtime/ui.py

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
from __future__ import annotations
2+
3+
import asyncio
4+
from collections.abc import Coroutine
5+
from typing import Any, Callable
6+
7+
import numpy as np
8+
import numpy.typing as npt
9+
import sounddevice as sd
10+
from textual import events
11+
from textual.app import App, ComposeResult
12+
from textual.containers import Container, Horizontal
13+
from textual.reactive import reactive
14+
from textual.widgets import RichLog, Static
15+
from typing_extensions import override
16+
17+
CHUNK_LENGTH_S = 0.05 # 50ms
18+
SAMPLE_RATE = 24000
19+
FORMAT = np.int16
20+
CHANNELS = 1
21+
22+
23+
class Header(Static):
24+
"""A header widget."""
25+
26+
@override
27+
def render(self) -> str:
28+
return "Realtime Demo"
29+
30+
31+
class AudioStatusIndicator(Static):
32+
"""A widget that shows the current audio recording status."""
33+
34+
is_recording = reactive(False)
35+
36+
@override
37+
def render(self) -> str:
38+
status = (
39+
"🔴 Conversation started."
40+
if self.is_recording
41+
else "⚪ Press SPACE to start the conversation (q to quit)"
42+
)
43+
return status
44+
45+
46+
class AppUI(App[None]):
47+
CSS = """
48+
Screen {
49+
background: #1a1b26; /* Dark blue-grey background */
50+
}
51+
52+
Container {
53+
border: double rgb(91, 164, 91);
54+
}
55+
56+
#input-container {
57+
height: 5; /* Explicit height for input container */
58+
margin: 1 1;
59+
padding: 1 2;
60+
}
61+
62+
#bottom-pane {
63+
width: 100%;
64+
height: 82%; /* Reduced to make room for session display */
65+
border: round rgb(205, 133, 63);
66+
}
67+
68+
#status-indicator {
69+
height: 3;
70+
content-align: center middle;
71+
background: #2a2b36;
72+
border: solid rgb(91, 164, 91);
73+
margin: 1 1;
74+
}
75+
76+
#session-display {
77+
height: 3;
78+
content-align: center middle;
79+
background: #2a2b36;
80+
border: solid rgb(91, 164, 91);
81+
margin: 1 1;
82+
}
83+
84+
#transcripts {
85+
width: 50%;
86+
height: 100%;
87+
border-right: solid rgb(91, 164, 91);
88+
}
89+
90+
#transcripts-header {
91+
height: 2;
92+
background: #2a2b36;
93+
content-align: center middle;
94+
border-bottom: solid rgb(91, 164, 91);
95+
}
96+
97+
#transcripts-content {
98+
height: 100%;
99+
}
100+
101+
#event-log {
102+
width: 50%;
103+
height: 100%;
104+
}
105+
106+
#event-log-header {
107+
height: 2;
108+
background: #2a2b36;
109+
content-align: center middle;
110+
border-bottom: solid rgb(91, 164, 91);
111+
}
112+
113+
#event-log-content {
114+
height: 100%;
115+
}
116+
117+
Static {
118+
color: white;
119+
}
120+
"""
121+
122+
should_send_audio: asyncio.Event
123+
connected: asyncio.Event
124+
last_audio_item_id: str | None
125+
audio_callback: Callable[[bytes], Coroutine[Any, Any, None]] | None
126+
127+
def __init__(self) -> None:
128+
super().__init__()
129+
self.audio_player = sd.OutputStream(
130+
samplerate=SAMPLE_RATE,
131+
channels=CHANNELS,
132+
dtype=FORMAT,
133+
)
134+
self.should_send_audio = asyncio.Event()
135+
self.connected = asyncio.Event()
136+
self.audio_callback = None
137+
138+
@override
139+
def compose(self) -> ComposeResult:
140+
"""Create child widgets for the app."""
141+
with Container():
142+
yield Header(id="session-display")
143+
yield AudioStatusIndicator(id="status-indicator")
144+
with Container(id="bottom-pane"):
145+
with Horizontal():
146+
with Container(id="transcripts"):
147+
yield Static("Conversation transcript", id="transcripts-header")
148+
yield RichLog(
149+
id="transcripts-content", wrap=True, highlight=True, markup=True
150+
)
151+
with Container(id="event-log"):
152+
yield Static("Raw event log", id="event-log-header")
153+
yield RichLog(
154+
id="event-log-content", wrap=True, highlight=True, markup=True
155+
)
156+
157+
def set_is_connected(self, is_connected: bool) -> None:
158+
self.connected.set() if is_connected else self.connected.clear()
159+
160+
def set_audio_callback(self, callback: Callable[[bytes], Coroutine[Any, Any, None]]) -> None:
161+
"""Set a callback function to be called when audio is recorded."""
162+
self.audio_callback = callback
163+
164+
# High-level methods for UI operations
165+
def set_header_text(self, text: str) -> None:
166+
"""Update the header text."""
167+
header = self.query_one("#session-display", Header)
168+
header.update(text)
169+
170+
def set_recording_status(self, is_recording: bool) -> None:
171+
"""Set the recording status indicator."""
172+
status_indicator = self.query_one(AudioStatusIndicator)
173+
status_indicator.is_recording = is_recording
174+
175+
def log_message(self, message: str) -> None:
176+
"""Add a message to the event log."""
177+
try:
178+
log_pane = self.query_one("#event-log-content", RichLog)
179+
log_pane.write(message)
180+
except Exception:
181+
# Handle the case where the widget might not be available
182+
pass
183+
184+
def add_transcript(self, message: str) -> None:
185+
"""Add a transcript message to the transcripts panel."""
186+
try:
187+
transcript_pane = self.query_one("#transcripts-content", RichLog)
188+
transcript_pane.write(message)
189+
except Exception:
190+
# Handle the case where the widget might not be available
191+
pass
192+
193+
def play_audio(self, audio_data: npt.NDArray[np.int16]) -> None:
194+
"""Play audio data through the audio player."""
195+
try:
196+
self.audio_player.write(audio_data)
197+
except Exception as e:
198+
self.log_message(f"Audio play error: {e}")
199+
200+
async def on_mount(self) -> None:
201+
"""Set up audio player and start the audio capture worker."""
202+
self.audio_player.start()
203+
self.run_worker(self.capture_audio())
204+
205+
async def capture_audio(self) -> None:
206+
"""Capture audio from the microphone and send to the session."""
207+
# Wait for connection to be established
208+
await self.connected.wait()
209+
210+
# Set up audio input stream
211+
stream = sd.InputStream(
212+
channels=CHANNELS,
213+
samplerate=SAMPLE_RATE,
214+
dtype=FORMAT,
215+
)
216+
217+
try:
218+
# Wait for user to press spacebar to start
219+
await self.should_send_audio.wait()
220+
221+
stream.start()
222+
self.set_recording_status(True)
223+
self.log_message("Recording started - speak to the agent")
224+
225+
# Buffer size in samples
226+
read_size = int(SAMPLE_RATE * CHUNK_LENGTH_S)
227+
228+
while True:
229+
# Check if there's enough data to read
230+
if stream.read_available < read_size:
231+
await asyncio.sleep(0.01) # Small sleep to avoid CPU hogging
232+
continue
233+
234+
# Read audio data
235+
data, _ = stream.read(read_size)
236+
237+
# Convert numpy array to bytes
238+
audio_bytes = data.tobytes()
239+
240+
# Call audio callback if set
241+
if self.audio_callback:
242+
try:
243+
await self.audio_callback(audio_bytes)
244+
except Exception as e:
245+
self.log_message(f"Audio callback error: {e}")
246+
247+
# Yield control back to event loop
248+
await asyncio.sleep(0)
249+
250+
except Exception as e:
251+
self.log_message(f"Audio capture error: {e}")
252+
finally:
253+
if stream.active:
254+
stream.stop()
255+
stream.close()
256+
257+
async def on_key(self, event: events.Key) -> None:
258+
"""Handle key press events."""
259+
# add the keypress to the log
260+
self.log_message(f"Key pressed: {event.key}")
261+
262+
if event.key == "q":
263+
self.audio_player.stop()
264+
self.audio_player.close()
265+
self.exit()
266+
return
267+
268+
if event.key == "space": # Spacebar
269+
if not self.should_send_audio.is_set():
270+
self.should_send_audio.set()
271+
self.set_recording_status(True)

src/agents/realtime/openai_realtime.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,9 @@ async def _handle_ws_event(self, event: dict[str, Any]):
231231
).validate_python(event)
232232
except Exception as e:
233233
logger.error(f"Invalid event: {event} - {e}")
234-
await self._emit_event(RealtimeTransportErrorEvent(error=f"Invalid event: {event}"))
234+
await self._emit_event(
235+
RealtimeTransportErrorEvent(error=f"Invalid event: {event} - {e}")
236+
)
235237
return
236238

237239
if parsed.type == "response.audio.delta":

0 commit comments

Comments
 (0)