Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 78675ff

Browse files
authored
[3/n] Config and items for realtime (openai#1070)
Similar to the TS version. - Config is the things you can set on the session - Items are similar to responses items. I'm using an abstraction instead of reusing the ones in the openai SDK, to reduce the amount of work for other providers --- [//]: # (BEGIN SAPLING FOOTER) * openai#1074 * openai#1073 * openai#1072 * openai#1071 * __->__ openai#1070 * openai#1069 * openai#1068
1 parent 9222bee commit 78675ff

File tree

3 files changed

+193
-1
lines changed

3 files changed

+193
-1
lines changed

src/agents/model_settings.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def validate_from_none(value: None) -> _Omit:
4545

4646
Omit = Annotated[_Omit, _OmitTypeAnnotation]
4747
Headers: TypeAlias = Mapping[str, Union[str, Omit]]
48+
ToolChoice: TypeAlias = Union[Literal["auto", "required", "none"], str, None]
4849

4950

5051
@dataclass
@@ -70,7 +71,7 @@ class ModelSettings:
7071
presence_penalty: float | None = None
7172
"""The presence penalty to use when calling the model."""
7273

73-
tool_choice: Literal["auto", "required", "none"] | str | None = None
74+
tool_choice: ToolChoice | None = None
7475
"""The tool choice to use when calling the model."""
7576

7677
parallel_tool_calls: bool | None = None

src/agents/realtime/config.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from __future__ import annotations
2+
3+
import inspect
4+
from typing import (
5+
Any,
6+
Callable,
7+
Literal,
8+
Union,
9+
)
10+
11+
from typing_extensions import NotRequired, TypeAlias, TypedDict
12+
13+
from ..model_settings import ToolChoice
14+
from ..tool import FunctionTool
15+
from ..util._types import MaybeAwaitable
16+
17+
18+
class RealtimeClientMessage(TypedDict):
19+
type: str # explicitly required
20+
other_data: NotRequired[dict[str, Any]]
21+
22+
23+
class UserInputText(TypedDict):
24+
type: Literal["input_text"]
25+
text: str
26+
27+
28+
class RealtimeUserInputMessage(TypedDict):
29+
type: Literal["message"]
30+
role: Literal["user"]
31+
content: list[UserInputText]
32+
33+
34+
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
35+
36+
37+
RealtimeAudioFormat: TypeAlias = Union[Literal["pcm16", "g711_ulaw", "g711_alaw"], str]
38+
39+
40+
class RealtimeInputAudioTranscriptionConfig(TypedDict):
41+
language: NotRequired[str]
42+
model: NotRequired[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"] | str]
43+
prompt: NotRequired[str]
44+
45+
46+
class RealtimeTurnDetectionConfig(TypedDict):
47+
"""Turn detection config. Allows extra vendor keys if needed."""
48+
49+
type: NotRequired[Literal["semantic_vad", "server_vad"]]
50+
create_response: NotRequired[bool]
51+
eagerness: NotRequired[Literal["auto", "low", "medium", "high"]]
52+
interrupt_response: NotRequired[bool]
53+
prefix_padding_ms: NotRequired[int]
54+
silence_duration_ms: NotRequired[int]
55+
threshold: NotRequired[float]
56+
57+
58+
class RealtimeSessionConfig(TypedDict):
59+
api_key: NotRequired[APIKeyOrKeyFunc]
60+
model: NotRequired[str]
61+
instructions: NotRequired[str]
62+
modalities: NotRequired[list[Literal["text", "audio"]]]
63+
voice: NotRequired[str]
64+
65+
input_audio_format: NotRequired[RealtimeAudioFormat]
66+
output_audio_format: NotRequired[RealtimeAudioFormat]
67+
input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
68+
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
69+
70+
tool_choice: NotRequired[ToolChoice]
71+
tools: NotRequired[list[FunctionTool]]
72+
73+
74+
APIKeyOrKeyFunc = str | Callable[[], MaybeAwaitable[str]]
75+
"""Either an API key or a function that returns an API key."""
76+
77+
78+
async def get_api_key(key: APIKeyOrKeyFunc | None) -> str | None:
79+
"""Get the API key from the key or key function."""
80+
if key is None:
81+
return None
82+
elif isinstance(key, str):
83+
return key
84+
85+
result = key()
86+
if inspect.isawaitable(result):
87+
return await result
88+
return result
89+
90+
# TODO (rm) Add tracing support
91+
# tracing: NotRequired[RealtimeTracingConfig | None]

src/agents/realtime/items.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from __future__ import annotations
2+
3+
from typing import Annotated, Literal, Union
4+
5+
from pydantic import BaseModel, ConfigDict, Field
6+
7+
8+
class InputText(BaseModel):
9+
type: Literal["input_text"] = "input_text"
10+
text: str
11+
12+
# Allow extra data
13+
model_config = ConfigDict(extra="allow")
14+
15+
16+
class InputAudio(BaseModel):
17+
type: Literal["input_audio"] = "input_audio"
18+
audio: str | None = None
19+
transcript: str | None = None
20+
21+
# Allow extra data
22+
model_config = ConfigDict(extra="allow")
23+
24+
25+
class AssistantText(BaseModel):
26+
type: Literal["text"] = "text"
27+
text: str
28+
29+
# Allow extra data
30+
model_config = ConfigDict(extra="allow")
31+
32+
33+
class AssistantAudio(BaseModel):
34+
type: Literal["audio"] = "audio"
35+
audio: str | None = None
36+
transcript: str | None = None
37+
38+
# Allow extra data
39+
model_config = ConfigDict(extra="allow")
40+
41+
42+
class SystemMessageItem(BaseModel):
43+
item_id: str
44+
previous_item_id: str | None = None
45+
type: Literal["message"] = "message"
46+
role: Literal["system"] = "system"
47+
content: list[InputText]
48+
49+
# Allow extra data
50+
model_config = ConfigDict(extra="allow")
51+
52+
53+
class UserMessageItem(BaseModel):
54+
item_id: str
55+
previous_item_id: str | None = None
56+
type: Literal["message"] = "message"
57+
role: Literal["user"] = "user"
58+
content: list[InputText | InputAudio]
59+
60+
# Allow extra data
61+
model_config = ConfigDict(extra="allow")
62+
63+
64+
class AssistantMessageItem(BaseModel):
65+
item_id: str
66+
previous_item_id: str | None = None
67+
type: Literal["message"] = "message"
68+
role: Literal["assistant"] = "assistant"
69+
status: Literal["in_progress", "completed", "incomplete"] | None = None
70+
content: list[AssistantText | AssistantAudio]
71+
72+
# Allow extra data
73+
model_config = ConfigDict(extra="allow")
74+
75+
76+
RealtimeMessageItem = Annotated[
77+
Union[SystemMessageItem, UserMessageItem, AssistantMessageItem],
78+
Field(discriminator="role"),
79+
]
80+
81+
82+
class RealtimeToolCallItem(BaseModel):
83+
item_id: str
84+
previous_item_id: str | None = None
85+
type: Literal["function_call"] = "function_call"
86+
status: Literal["in_progress", "completed"]
87+
arguments: str
88+
name: str
89+
output: str | None = None
90+
91+
# Allow extra data
92+
model_config = ConfigDict(extra="allow")
93+
94+
95+
RealtimeItem = RealtimeMessageItem | RealtimeToolCallItem
96+
97+
98+
class RealtimeResponse(BaseModel):
99+
id: str
100+
output: list[RealtimeMessageItem]

0 commit comments

Comments
 (0)