Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3a64c6f

Browse files
authored
[8/n] Make realtime more like the rest of agents sdk (openai#1076)
Key changes: 1. Transport -> model. 2. Extract any model settings into `RealtimeSessionModelSettings`. 3. RealtimeRunConfig, similar to the RunConfig in `run.py`. 4. RealtimeRunner now exists, similar to Runner. Returns a RealtimeSession when you call run(). 5. RealtimeSession now uses streaming events instead of listener. --- [//]: # (BEGIN SAPLING FOOTER) * openai#1080 * openai#1079 * __->__ openai#1076
1 parent c078c69 commit 3a64c6f

File tree

12 files changed

+532
-428
lines changed

12 files changed

+532
-428
lines changed

.vscode/launch.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": "Python Debugger: Python File",
9+
"type": "debugpy",
10+
"request": "launch",
11+
"program": "${file}"
12+
}
13+
]
14+
}

examples/realtime/demo.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
import numpy as np
77

8+
from agents.realtime import RealtimeSession
9+
810
# Add the current directory to path so we can import ui
911
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
1012

1113
from agents import function_tool
12-
from agents.realtime import RealtimeAgent, RealtimeSession, RealtimeSessionEvent
14+
from agents.realtime import RealtimeAgent, RealtimeRunner, RealtimeSessionEvent
1315

1416
if TYPE_CHECKING:
1517
from .ui import AppUI
@@ -38,23 +40,34 @@ def get_weather(city: str) -> str:
3840

3941
class Example:
4042
def __init__(self) -> None:
41-
self.session = RealtimeSession(agent)
4243
self.ui = AppUI()
4344
self.ui.connected = asyncio.Event()
4445
self.ui.last_audio_item_id = None
4546
# Set the audio callback
4647
self.ui.set_audio_callback(self.on_audio_recorded)
4748

49+
self.session: RealtimeSession | None = None
50+
4851
async def run(self) -> None:
49-
self.session.add_listener(self.on_event)
50-
await self.session.connect()
51-
self.ui.set_is_connected(True)
52-
await self.ui.run_async()
52+
# Start UI in a separate task instead of waiting for it to complete
53+
ui_task = asyncio.create_task(self.ui.run_async())
54+
55+
# Set up session immediately without waiting for UI to finish
56+
runner = RealtimeRunner(agent)
57+
async with await runner.run() as session:
58+
self.session = session
59+
self.ui.set_is_connected(True)
60+
async for event in session:
61+
await self.on_event(event)
62+
63+
# Wait for UI task to complete when session ends
64+
await ui_task
5365

5466
async def on_audio_recorded(self, audio_bytes: bytes) -> None:
5567
"""Called when audio is recorded by the UI."""
5668
try:
5769
# Send the audio to the session
70+
assert self.session is not None
5871
await self.session.send_audio(audio_bytes)
5972
except Exception as e:
6073
self.ui.log_message(f"Error sending audio: {e}")
@@ -87,8 +100,8 @@ async def on_event(self, event: RealtimeSessionEvent) -> None:
87100
pass
88101
elif event.type == "history_added":
89102
pass
90-
elif event.type == "raw_transport_event":
91-
self.ui.log_message(f"Raw transport event: {event.data}")
103+
elif event.type == "raw_model_event":
104+
self.ui.log_message(f"Raw model event: {event.data}")
92105
else:
93106
self.ui.log_message(f"Unknown event type: {event.type}")
94107
except Exception as e:

src/agents/realtime/__init__.py

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
2-
from .config import APIKeyOrKeyFunc
2+
from .config import (
3+
RealtimeAudioFormat,
4+
RealtimeClientMessage,
5+
RealtimeInputAudioTranscriptionConfig,
6+
RealtimeModelName,
7+
RealtimeRunConfig,
8+
RealtimeSessionModelSettings,
9+
RealtimeTurnDetectionConfig,
10+
RealtimeUserInput,
11+
RealtimeUserInputMessage,
12+
RealtimeUserInputText,
13+
)
314
from .events import (
415
RealtimeAgentEndEvent,
516
RealtimeAgentStartEvent,
@@ -10,42 +21,49 @@
1021
RealtimeHandoffEvent,
1122
RealtimeHistoryAdded,
1223
RealtimeHistoryUpdated,
13-
RealtimeRawTransportEvent,
24+
RealtimeRawModelEvent,
1425
RealtimeSessionEvent,
1526
RealtimeToolEnd,
1627
RealtimeToolStart,
1728
)
18-
from .session import RealtimeSession
19-
from .transport import (
20-
RealtimeModelName,
21-
RealtimeSessionTransport,
22-
RealtimeTransportConnectionOptions,
23-
RealtimeTransportListener,
29+
from .model import (
30+
RealtimeModel,
31+
RealtimeModelConfig,
32+
RealtimeModelListener,
2433
)
34+
from .runner import RealtimeRunner
35+
from .session import RealtimeSession
2536

2637
__all__ = [
2738
"RealtimeAgent",
2839
"RealtimeAgentHooks",
2940
"RealtimeRunHooks",
30-
"RealtimeSession",
31-
"RealtimeSessionListener",
32-
"RealtimeSessionListenerFunc",
33-
"APIKeyOrKeyFunc",
41+
"RealtimeRunner",
42+
"RealtimeRunConfig",
43+
"RealtimeSessionModelSettings",
44+
"RealtimeInputAudioTranscriptionConfig",
45+
"RealtimeTurnDetectionConfig",
46+
"RealtimeAudioFormat",
47+
"RealtimeClientMessage",
48+
"RealtimeUserInput",
49+
"RealtimeUserInputMessage",
50+
"RealtimeUserInputText",
3451
"RealtimeModelName",
35-
"RealtimeSessionTransport",
36-
"RealtimeTransportListener",
37-
"RealtimeTransportConnectionOptions",
52+
"RealtimeModel",
53+
"RealtimeModelListener",
54+
"RealtimeModelConfig",
3855
"RealtimeSessionEvent",
3956
"RealtimeAgentStartEvent",
4057
"RealtimeAgentEndEvent",
4158
"RealtimeHandoffEvent",
4259
"RealtimeToolStart",
4360
"RealtimeToolEnd",
44-
"RealtimeRawTransportEvent",
61+
"RealtimeRawModelEvent",
4562
"RealtimeAudioEnd",
4663
"RealtimeAudio",
4764
"RealtimeAudioInterrupted",
4865
"RealtimeError",
4966
"RealtimeHistoryUpdated",
5067
"RealtimeHistoryAdded",
68+
"RealtimeSession",
5169
]

src/agents/realtime/config.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,44 @@
11
from __future__ import annotations
22

3-
import inspect
43
from typing import (
54
Any,
6-
Callable,
75
Literal,
86
Union,
97
)
108

119
from typing_extensions import NotRequired, TypeAlias, TypedDict
1210

1311
from ..model_settings import ToolChoice
14-
from ..tool import FunctionTool
15-
from ..util._types import MaybeAwaitable
12+
from ..tool import Tool
13+
14+
RealtimeModelName: TypeAlias = Union[
15+
Literal[
16+
"gpt-4o-realtime-preview",
17+
"gpt-4o-mini-realtime-preview",
18+
"gpt-4o-realtime-preview-2025-06-03",
19+
"gpt-4o-realtime-preview-2024-12-17",
20+
"gpt-4o-realtime-preview-2024-10-01",
21+
"gpt-4o-mini-realtime-preview-2024-12-17",
22+
],
23+
str,
24+
]
25+
"""The name of a realtime model."""
1626

1727

1828
class RealtimeClientMessage(TypedDict):
1929
type: str # explicitly required
2030
other_data: NotRequired[dict[str, Any]]
2131

2232

23-
class UserInputText(TypedDict):
33+
class RealtimeUserInputText(TypedDict):
2434
type: Literal["input_text"]
2535
text: str
2636

2737

2838
class RealtimeUserInputMessage(TypedDict):
2939
type: Literal["message"]
3040
role: Literal["user"]
31-
content: list[UserInputText]
41+
content: list[RealtimeUserInputText]
3242

3343

3444
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
@@ -55,9 +65,11 @@ class RealtimeTurnDetectionConfig(TypedDict):
5565
threshold: NotRequired[float]
5666

5767

58-
class RealtimeSessionConfig(TypedDict):
59-
api_key: NotRequired[APIKeyOrKeyFunc]
60-
model: NotRequired[str]
68+
class RealtimeSessionModelSettings(TypedDict):
69+
"""Model settings for a realtime model session."""
70+
71+
model_name: NotRequired[RealtimeModelName]
72+
6173
instructions: NotRequired[str]
6274
modalities: NotRequired[list[Literal["text", "audio"]]]
6375
voice: NotRequired[str]
@@ -68,24 +80,13 @@ class RealtimeSessionConfig(TypedDict):
6880
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
6981

7082
tool_choice: NotRequired[ToolChoice]
71-
tools: NotRequired[list[FunctionTool]]
72-
73-
74-
APIKeyOrKeyFunc = str | Callable[[], MaybeAwaitable[str]]
75-
"""Either an API key or a function that returns an API key."""
76-
83+
tools: NotRequired[list[Tool]]
7784

78-
async def get_api_key(key: APIKeyOrKeyFunc | None) -> str | None:
79-
"""Get the API key from the key or key function."""
80-
if key is None:
81-
return None
82-
elif isinstance(key, str):
83-
return key
8485

85-
result = key()
86-
if inspect.isawaitable(result):
87-
return await result
88-
return result
86+
class RealtimeRunConfig(TypedDict):
87+
model_settings: NotRequired[RealtimeSessionModelSettings]
8988

9089
# TODO (rm) Add tracing support
9190
# tracing: NotRequired[RealtimeTracingConfig | None]
91+
# TODO (rm) Add guardrail support
92+
# TODO (rm) Add history audio storage config

src/agents/realtime/events.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from dataclasses import dataclass
24
from typing import Any, Literal, Union
35

@@ -7,7 +9,7 @@
79
from ..tool import Tool
810
from .agent import RealtimeAgent
911
from .items import RealtimeItem
10-
from .transport_events import RealtimeTransportAudioEvent, RealtimeTransportEvent
12+
from .model_events import RealtimeModelAudioEvent, RealtimeModelEvent
1113

1214

1315
@dataclass
@@ -93,16 +95,16 @@ class RealtimeToolEnd:
9395

9496

9597
@dataclass
96-
class RealtimeRawTransportEvent:
97-
"""Forwards raw events from the transport layer."""
98+
class RealtimeRawModelEvent:
99+
"""Forwards raw events from the model layer."""
98100

99-
data: RealtimeTransportEvent
100-
"""The raw data from the transport layer."""
101+
data: RealtimeModelEvent
102+
"""The raw data from the model layer."""
101103

102104
info: RealtimeEventInfo
103105
"""Common info for all events, such as the context."""
104106

105-
type: Literal["raw_transport_event"] = "raw_transport_event"
107+
type: Literal["raw_model_event"] = "raw_model_event"
106108

107109

108110
@dataclass
@@ -119,8 +121,8 @@ class RealtimeAudioEnd:
119121
class RealtimeAudio:
120122
"""Triggered when the agent generates new audio to be played."""
121123

122-
audio: RealtimeTransportAudioEvent
123-
"""The audio event from the transport layer."""
124+
audio: RealtimeModelAudioEvent
125+
"""The audio event from the model layer."""
124126

125127
info: RealtimeEventInfo
126128
"""Common info for all events, such as the context."""
@@ -187,7 +189,7 @@ class RealtimeHistoryAdded:
187189
RealtimeHandoffEvent,
188190
RealtimeToolStart,
189191
RealtimeToolEnd,
190-
RealtimeRawTransportEvent,
192+
RealtimeRawModelEvent,
191193
RealtimeAudioEnd,
192194
RealtimeAudio,
193195
RealtimeAudioInterrupted,

0 commit comments

Comments
 (0)