diff --git a/examples/mcp/streamablehttp_example/README.md b/examples/mcp/streamablehttp_example/README.md new file mode 100644 index 00000000..a07fe19b --- /dev/null +++ b/examples/mcp/streamablehttp_example/README.md @@ -0,0 +1,13 @@ +# MCP Streamable HTTP Example + +This example uses a local Streamable HTTP server in [server.py](server.py). + +Run the example via: + +``` +uv run python examples/mcp/streamablehttp_example/main.py +``` + +## Details + +The example uses the `MCPServerStreamableHttp` class from `agents.mcp`. The server runs in a sub-process at `https://localhost:8000/mcp`. diff --git a/examples/mcp/streamablehttp_example/main.py b/examples/mcp/streamablehttp_example/main.py new file mode 100644 index 00000000..cc95e798 --- /dev/null +++ b/examples/mcp/streamablehttp_example/main.py @@ -0,0 +1,83 @@ +import asyncio +import os +import shutil +import subprocess +import time +from typing import Any + +from agents import Agent, Runner, gen_trace_id, trace +from agents.mcp import MCPServer, MCPServerStreamableHttp +from agents.model_settings import ModelSettings + + +async def run(mcp_server: MCPServer): + agent = Agent( + name="Assistant", + instructions="Use the tools to answer the questions.", + mcp_servers=[mcp_server], + model_settings=ModelSettings(tool_choice="required"), + ) + + # Use the `add` tool to add two numbers + message = "Add these numbers: 7 and 22." + print(f"Running: {message}") + result = await Runner.run(starting_agent=agent, input=message) + print(result.final_output) + + # Run the `get_weather` tool + message = "What's the weather in Tokyo?" + print(f"\n\nRunning: {message}") + result = await Runner.run(starting_agent=agent, input=message) + print(result.final_output) + + # Run the `get_secret_word` tool + message = "What's the secret word?" + print(f"\n\nRunning: {message}") + result = await Runner.run(starting_agent=agent, input=message) + print(result.final_output) + + +async def main(): + async with MCPServerStreamableHttp( + name="Streamable HTTP Python Server", + params={ + "url": "http://localhost:8000/mcp", + }, + ) as server: + trace_id = gen_trace_id() + with trace(workflow_name="Streamable HTTP Example", trace_id=trace_id): + print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}\n") + await run(server) + + +if __name__ == "__main__": + # Let's make sure the user has uv installed + if not shutil.which("uv"): + raise RuntimeError( + "uv is not installed. Please install it: https://docs.astral.sh/uv/getting-started/installation/" + ) + + # We'll run the Streamable HTTP server in a subprocess. Usually this would be a remote server, but for this + # demo, we'll run it locally at http://localhost:8000/mcp + process: subprocess.Popen[Any] | None = None + try: + this_dir = os.path.dirname(os.path.abspath(__file__)) + server_file = os.path.join(this_dir, "server.py") + + print("Starting Streamable HTTP server at http://localhost:8000/mcp ...") + + # Run `uv run server.py` to start the Streamable HTTP server + process = subprocess.Popen(["uv", "run", server_file]) + # Give it 3 seconds to start + time.sleep(3) + + print("Streamable HTTP server started. Running example...\n\n") + except Exception as e: + print(f"Error starting Streamable HTTP server: {e}") + exit(1) + + try: + asyncio.run(main()) + finally: + if process: + process.terminate() diff --git a/examples/mcp/streamablehttp_example/server.py b/examples/mcp/streamablehttp_example/server.py new file mode 100644 index 00000000..d8f83965 --- /dev/null +++ b/examples/mcp/streamablehttp_example/server.py @@ -0,0 +1,33 @@ +import random + +import requests +from mcp.server.fastmcp import FastMCP + +# Create server +mcp = FastMCP("Echo Server") + + +@mcp.tool() +def add(a: int, b: int) -> int: + """Add two numbers""" + print(f"[debug-server] add({a}, {b})") + return a + b + + +@mcp.tool() +def get_secret_word() -> str: + print("[debug-server] get_secret_word()") + return random.choice(["apple", "banana", "cherry"]) + + +@mcp.tool() +def get_current_weather(city: str) -> str: + print(f"[debug-server] get_current_weather({city})") + + endpoint = "https://wttr.in" + response = requests.get(f"{endpoint}/{city}") + return response.text + + +if __name__ == "__main__": + mcp.run(transport="streamable-http") diff --git a/examples/research_bot/agents/search_agent.py b/examples/research_bot/agents/search_agent.py index 0212ce5b..61f91701 100644 --- a/examples/research_bot/agents/search_agent.py +++ b/examples/research_bot/agents/search_agent.py @@ -3,7 +3,7 @@ INSTRUCTIONS = ( "You are a research assistant. Given a search term, you search the web for that term and " - "produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 " + "produce a concise summary of the results. The summary must be 2-3 paragraphs and less than 300 " "words. Capture the main points. Write succinctly, no need to have complete sentences or good " "grammar. This will be consumed by someone synthesizing a report, so its vital you capture the " "essence and ignore any fluff. Do not include any additional commentary other than the summary " diff --git a/pyproject.toml b/pyproject.toml index 22b028ae..672258c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "openai-agents" -version = "0.0.14" +version = "0.0.15" description = "OpenAI Agents SDK" readme = "README.md" requires-python = ">=3.9" @@ -13,7 +13,7 @@ dependencies = [ "typing-extensions>=4.12.2, <5", "requests>=2.0, <3", "types-requests>=2.0, <3", - "mcp>=1.6.0, <2; python_version >= '3.10'", + "mcp>=1.8.0, <2; python_version >= '3.10'", ] classifiers = [ "Typing :: Typed", diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py index dc672acd..d3b25a19 100644 --- a/src/agents/extensions/models/litellm_model.py +++ b/src/agents/extensions/models/litellm_model.py @@ -269,6 +269,8 @@ async def _fetch_response( extra_kwargs["extra_query"] = model_settings.extra_query if model_settings.metadata: extra_kwargs["metadata"] = model_settings.metadata + if model_settings.extra_body and isinstance(model_settings.extra_body, dict): + extra_kwargs.update(model_settings.extra_body) ret = await litellm.acompletion( model=self.model, diff --git a/src/agents/mcp/__init__.py b/src/agents/mcp/__init__.py index 1a72a89f..d4eb8fa6 100644 --- a/src/agents/mcp/__init__.py +++ b/src/agents/mcp/__init__.py @@ -5,6 +5,8 @@ MCPServerSseParams, MCPServerStdio, MCPServerStdioParams, + MCPServerStreamableHttp, + MCPServerStreamableHttpParams, ) except ImportError: pass @@ -17,5 +19,7 @@ "MCPServerSseParams", "MCPServerStdio", "MCPServerStdioParams", + "MCPServerStreamableHttp", + "MCPServerStreamableHttpParams", "MCPUtil", ] diff --git a/src/agents/mcp/server.py b/src/agents/mcp/server.py index 9916c92b..c5255ead 100644 --- a/src/agents/mcp/server.py +++ b/src/agents/mcp/server.py @@ -10,7 +10,9 @@ from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream from mcp import ClientSession, StdioServerParameters, Tool as MCPTool, stdio_client from mcp.client.sse import sse_client -from mcp.types import CallToolResult, JSONRPCMessage +from mcp.client.streamable_http import GetSessionIdCallback, streamablehttp_client +from mcp.shared.message import SessionMessage +from mcp.types import CallToolResult from typing_extensions import NotRequired, TypedDict from ..exceptions import UserError @@ -83,8 +85,9 @@ def create_streams( self, ) -> AbstractAsyncContextManager[ tuple[ - MemoryObjectReceiveStream[JSONRPCMessage | Exception], - MemoryObjectSendStream[JSONRPCMessage], + MemoryObjectReceiveStream[SessionMessage | Exception], + MemoryObjectSendStream[SessionMessage], + GetSessionIdCallback | None ] ]: """Create the streams for the server.""" @@ -105,7 +108,11 @@ async def connect(self): """Connect to the server.""" try: transport = await self.exit_stack.enter_async_context(self.create_streams()) - read, write = transport + # streamablehttp_client returns (read, write, get_session_id) + # sse_client returns (read, write) + + read, write, *_ = transport + session = await self.exit_stack.enter_async_context( ClientSession( read, @@ -232,8 +239,9 @@ def create_streams( self, ) -> AbstractAsyncContextManager[ tuple[ - MemoryObjectReceiveStream[JSONRPCMessage | Exception], - MemoryObjectSendStream[JSONRPCMessage], + MemoryObjectReceiveStream[SessionMessage | Exception], + MemoryObjectSendStream[SessionMessage], + GetSessionIdCallback | None ] ]: """Create the streams for the server.""" @@ -302,8 +310,9 @@ def create_streams( self, ) -> AbstractAsyncContextManager[ tuple[ - MemoryObjectReceiveStream[JSONRPCMessage | Exception], - MemoryObjectSendStream[JSONRPCMessage], + MemoryObjectReceiveStream[SessionMessage | Exception], + MemoryObjectSendStream[SessionMessage], + GetSessionIdCallback | None ] ]: """Create the streams for the server.""" @@ -318,3 +327,84 @@ def create_streams( def name(self) -> str: """A readable name for the server.""" return self._name + + +class MCPServerStreamableHttpParams(TypedDict): + """Mirrors the params in`mcp.client.streamable_http.streamablehttp_client`.""" + + url: str + """The URL of the server.""" + + headers: NotRequired[dict[str, str]] + """The headers to send to the server.""" + + timeout: NotRequired[timedelta] + """The timeout for the HTTP request. Defaults to 5 seconds.""" + + sse_read_timeout: NotRequired[timedelta] + """The timeout for the SSE connection, in seconds. Defaults to 5 minutes.""" + + terminate_on_close: NotRequired[bool] + """Terminate on close""" + + +class MCPServerStreamableHttp(_MCPServerWithClientSession): + """MCP server implementation that uses the Streamable HTTP transport. See the [spec] + (https://modelcontextprotocol.io/specification/2025-03-26/basic/transports#streamable-http) + for details. + """ + + def __init__( + self, + params: MCPServerStreamableHttpParams, + cache_tools_list: bool = False, + name: str | None = None, + client_session_timeout_seconds: float | None = 5, + ): + """Create a new MCP server based on the Streamable HTTP transport. + + Args: + params: The params that configure the server. This includes the URL of the server, + the headers to send to the server, the timeout for the HTTP request, and the + timeout for the Streamable HTTP connection and whether we need to + terminate on close. + + cache_tools_list: Whether to cache the tools list. If `True`, the tools list will be + cached and only fetched from the server once. If `False`, the tools list will be + fetched from the server on each call to `list_tools()`. The cache can be + invalidated by calling `invalidate_tools_cache()`. You should set this to `True` + if you know the server will not change its tools list, because it can drastically + improve latency (by avoiding a round-trip to the server every time). + + name: A readable name for the server. If not provided, we'll create one from the + URL. + + client_session_timeout_seconds: the read timeout passed to the MCP ClientSession. + """ + super().__init__(cache_tools_list, client_session_timeout_seconds) + + self.params = params + self._name = name or f"streamable_http: {self.params['url']}" + + def create_streams( + self, + ) -> AbstractAsyncContextManager[ + tuple[ + MemoryObjectReceiveStream[SessionMessage | Exception], + MemoryObjectSendStream[SessionMessage], + GetSessionIdCallback | None + ] + ]: + """Create the streams for the server.""" + return streamablehttp_client( + url=self.params["url"], + headers=self.params.get("headers", None), + timeout=self.params.get("timeout", timedelta(seconds=30)), + sse_read_timeout=self.params.get("sse_read_timeout", timedelta(seconds=60 * 5)), + terminate_on_close=self.params.get("terminate_on_close", True) + ) + + @property + def name(self) -> str: + """A readable name for the server.""" + return self._name diff --git a/src/agents/models/chatcmpl_converter.py b/src/agents/models/chatcmpl_converter.py index 613a3745..1d599e8c 100644 --- a/src/agents/models/chatcmpl_converter.py +++ b/src/agents/models/chatcmpl_converter.py @@ -234,7 +234,7 @@ def extract_all_content( type="image_url", image_url={ "url": casted_image_param["image_url"], - "detail": casted_image_param["detail"], + "detail": casted_image_param.get("detail", "auto"), }, ) ) diff --git a/tests/models/test_litellm_extra_body.py b/tests/models/test_litellm_extra_body.py new file mode 100644 index 00000000..ac56c25c --- /dev/null +++ b/tests/models/test_litellm_extra_body.py @@ -0,0 +1,45 @@ +import litellm +import pytest +from litellm.types.utils import Choices, Message, ModelResponse, Usage + +from agents.extensions.models.litellm_model import LitellmModel +from agents.model_settings import ModelSettings +from agents.models.interface import ModelTracing + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_extra_body_is_forwarded(monkeypatch): + """ + Forward `extra_body` entries into litellm.acompletion kwargs. + + This ensures that user-provided parameters (e.g. cached_content) + arrive alongside default arguments. + """ + captured: dict[str, object] = {} + + async def fake_acompletion(model, messages=None, **kwargs): + captured.update(kwargs) + msg = Message(role="assistant", content="ok") + choice = Choices(index=0, message=msg) + return ModelResponse(choices=[choice], usage=Usage(0, 0, 0)) + + monkeypatch.setattr(litellm, "acompletion", fake_acompletion) + settings = ModelSettings( + temperature=0.1, + extra_body={"cached_content": "some_cache", "foo": 123} + ) + model = LitellmModel(model="test-model") + + await model.get_response( + system_instructions=None, + input=[], + model_settings=settings, + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + ) + + assert {"cached_content": "some_cache", "foo": 123}.items() <= captured.items() diff --git a/uv.lock b/uv.lock index 87dd3cd2..6ccc1996 100644 --- a/uv.lock +++ b/uv.lock @@ -1047,7 +1047,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.6.0" +version = "1.8.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio", marker = "python_full_version >= '3.10'" }, @@ -1055,13 +1055,14 @@ dependencies = [ { name = "httpx-sse", marker = "python_full_version >= '3.10'" }, { name = "pydantic", marker = "python_full_version >= '3.10'" }, { name = "pydantic-settings", marker = "python_full_version >= '3.10'" }, + { name = "python-multipart", marker = "python_full_version >= '3.10'" }, { name = "sse-starlette", marker = "python_full_version >= '3.10'" }, { name = "starlette", marker = "python_full_version >= '3.10'" }, - { name = "uvicorn", marker = "python_full_version >= '3.10'" }, + { name = "uvicorn", marker = "python_full_version >= '3.10' and sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/95/d2/f587cb965a56e992634bebc8611c5b579af912b74e04eb9164bd49527d21/mcp-1.6.0.tar.gz", hash = "sha256:d9324876de2c5637369f43161cd71eebfd803df5a95e46225cab8d280e366723", size = 200031 } +sdist = { url = "https://files.pythonhosted.org/packages/7c/13/16b712e8a3be6a736b411df2fc6b4e75eb1d3e99b1cd57a3a1decf17f612/mcp-1.8.1.tar.gz", hash = "sha256:ec0646271d93749f784d2316fb5fe6102fb0d1be788ec70a9e2517e8f2722c0e", size = 265605 } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/30/20a7f33b0b884a9d14dd3aa94ff1ac9da1479fe2ad66dd9e2736075d2506/mcp-1.6.0-py3-none-any.whl", hash = "sha256:7bd24c6ea042dbec44c754f100984d186620d8b841ec30f1b19eda9b93a634d0", size = 76077 }, + { url = "https://files.pythonhosted.org/packages/1c/5d/91cf0d40e40ae9ecf8d4004e0f9611eea86085aa0b5505493e0ff53972da/mcp-1.8.1-py3-none-any.whl", hash = "sha256:948e03783859fa35abe05b9b6c0a1d5519be452fc079dc8d7f682549591c1770", size = 119761 }, ] [[package]] @@ -1533,7 +1534,7 @@ requires-dist = [ { name = "graphviz", marker = "extra == 'viz'", specifier = ">=0.17" }, { name = "griffe", specifier = ">=1.5.6,<2" }, { name = "litellm", marker = "extra == 'litellm'", specifier = ">=1.67.4.post1,<2" }, - { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.6.0,<2" }, + { name = "mcp", marker = "python_full_version >= '3.10'", specifier = ">=1.8.0,<2" }, { name = "numpy", marker = "python_full_version >= '3.10' and extra == 'voice'", specifier = ">=2.2.0,<3" }, { name = "openai", specifier = ">=1.76.0" }, { name = "pydantic", specifier = ">=2.10,<3" }, @@ -2085,6 +2086,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256 }, ] +[[package]] +name = "python-multipart" +version = "0.0.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546 }, +] + [[package]] name = "python-xlib" version = "0.33"