diff --git a/examples/smolagents_structured_output.py b/examples/smolagents_structured_output.py new file mode 100644 index 0000000..75599c2 --- /dev/null +++ b/examples/smolagents_structured_output.py @@ -0,0 +1,99 @@ +""" +Minimal example showcasing the difference between using structured output +and not using it with smolagents CodeAgent. + +This demonstrates how CodeAgent needs fewer iterations when it knows +the output structure beforehand. +""" + +import os +from textwrap import dedent + +from dotenv import load_dotenv +from mcp import StdioServerParameters +from smolagents import CodeAgent, InferenceClientModel + +from mcpadapt.core import MCPAdapt +from mcpadapt.smolagents_adapter import SmolAgentsAdapter + +# Load environment variables +load_dotenv() + +# Minimal MCP server that returns product information +server_script = dedent( + """ + from mcp.server.fastmcp import FastMCP + + mcp = FastMCP("Product Server") + + @mcp.tool() + def get_product_info(product_id: str) -> dict: + '''Get detailed information about a product''' + # Simulate a product database lookup + products = { + "laptop-123": { + "name": "UltraBook Pro", + "price": 1299, + "stock": 15, + "specs": { + "cpu": "Intel i7", + "ram": "16GB", + "storage": "512GB SSD" + } + } + } + return products.get(product_id, {"error": "Product not found"}) + + mcp.run() + """ +) + + +def demo_without_structured_output(): + """Demo showing CodeAgent without structured output support.""" + print("\n=== Demo WITHOUT structured output ===\n") + + # Use the adapter WITHOUT structured output + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", server_script] + ), + SmolAgentsAdapter(structured_output=False), # Disabled + ) as tools: + model = InferenceClientModel(token=os.getenv("HF_TOKEN")) + agent = CodeAgent(tools=tools, model=model) + + # Ask for specific nested information + result = agent.run( + "What is the RAM specification of laptop-123? " + "Return just the RAM value as a string." + ) + print(f"Result: {result}") + + +def demo_with_structured_output(): + """Demo showing CodeAgent with structured output support.""" + print("\n=== Demo WITH structured output ===\n") + + # Use the adapter WITH structured output + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", server_script] + ), + SmolAgentsAdapter(structured_output=True), # Enabled + ) as tools: + model = InferenceClientModel(token=os.getenv("HF_TOKEN")) + agent = CodeAgent(tools=tools, model=model) + + # Ask for the same nested information + result = agent.run( + "What is the RAM specification of laptop-123? " + "Return just the RAM value as a string." + ) + print(f"Result: {result}") + + +if __name__ == "__main__": + # Run both demos + demo_without_structured_output() + demo_with_structured_output() diff --git a/pyproject.toml b/pyproject.toml index 85548d2..07a0934 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.10" license = "MIT" authors = [{ name = "Guillaume Raille", email = "guillaume.raille@gmail.com" }] dependencies = [ - "mcp>=1.9.4", + "mcp>=1.10.1", "jsonref>=1.1.0", "python-dotenv>=1.0.1", "pydantic>=2.10.6", diff --git a/src/mcpadapt/smolagents_adapter.py b/src/mcpadapt/smolagents_adapter.py index 7fe4ed5..d027da3 100644 --- a/src/mcpadapt/smolagents_adapter.py +++ b/src/mcpadapt/smolagents_adapter.py @@ -3,12 +3,16 @@ SmolAgents do not support async tools, so this adapter will only work with the sync context manager. +To enable structured output support (MCP spec 2025-06-18+), use: +SmolAgentsAdapter(structured_output=True) + Example Usage: >>> with MCPAdapt(StdioServerParameters(command="uv", args=["run", "src/echo.py"]), SmolAgentsAdapter()) as tools: >>> print(tools) """ import base64 +import json import keyword import logging import re @@ -62,6 +66,17 @@ class SmolAgentsAdapter(ToolAdapter): """ + def __init__(self, structured_output: bool = False): + """Initialize the SmolAgentsAdapter. + + Args: + structured_output: If True, enable structured output features including + outputSchema support and structured content handling. + If False, use the original simple behavior. + Defaults to False for backwards compatibility. + """ + self.structured_output = structured_output + def adapt( self, func: Callable[[dict | None], mcp.types.CallToolResult], @@ -77,6 +92,41 @@ def adapt( A SmolAgents tool. """ + # make sure jsonref are resolved + input_schema = { + k: v + for k, v in jsonref.replace_refs(mcp_tool.inputSchema).items() + if k != "$defs" + } + + # make sure mandatory `description` and `type` is provided for each arguments: + for k, v in input_schema["properties"].items(): + if "description" not in v: + input_schema["properties"][k]["description"] = "see tool description" + if "type" not in v: + input_schema["properties"][k]["type"] = "string" + + # Extract and resolve outputSchema if present (only if structured_output=True) + output_schema = None + if ( + self.structured_output + and hasattr(mcp_tool, "outputSchema") + and mcp_tool.outputSchema + ): + try: + output_schema = jsonref.replace_refs(mcp_tool.outputSchema) + except Exception as e: + logger.warning( + f"Failed to resolve outputSchema for tool {mcp_tool.name}: {e}" + ) + output_schema = ( + mcp_tool.outputSchema + ) # Use unresolved schema as fallback + + # Always use "object" output_type for maximum flexibility + # Smolagents will handle type detection at runtime + output_type = "object" + class MCPAdaptTool(smolagents.Tool): def __init__( self, @@ -84,17 +134,21 @@ def __init__( description: str, inputs: dict[str, dict[str, str]], output_type: str, + output_schema: dict[str, Any] | None = None, + structured_output: bool = False, ): self.name = _sanitize_function_name(name) self.description = description self.inputs = inputs self.output_type = output_type + self.output_schema = output_schema + self.structured_output = structured_output self.is_initialized = True self.skip_forward_signature_validation = True def forward( self, *args, **kwargs - ) -> Union[str, "PILImage", "torch.Tensor"]: + ) -> Union[str, "PILImage", "torch.Tensor", Any]: if len(args) > 0: if len(args) == 1 and isinstance(args[0], dict) and not kwargs: mcp_output = func(args[0]) @@ -105,27 +159,57 @@ def forward( else: mcp_output = func(kwargs) - if len(mcp_output.content) == 0: + # Early exit for empty content + if not mcp_output.content: raise ValueError(f"tool {self.name} returned an empty content") + # Handle structured features if enabled + if self.structured_output: + # Prioritize structuredContent if available + if ( + hasattr(mcp_output, "structuredContent") + and mcp_output.structuredContent is not None + ): + return mcp_output.structuredContent + + # Handle multiple content warning (unified for both modes) if len(mcp_output.content) > 1: - logger.warning( - f"tool {self.name} returned multiple content, using the first one" + warning_msg = ( + f"tool {self.name} returned multiple content items but no structuredContent. Using the first content item." + if self.structured_output + else f"tool {self.name} returned multiple content, using the first one" ) - - content = mcp_output.content[0] - - if isinstance(content, mcp.types.TextContent): - return content.text - - if isinstance(content, mcp.types.ImageContent): + logger.warning(warning_msg) + + # Get the first content item + content_item = mcp_output.content[0] + + # Handle different content types + if isinstance(content_item, mcp.types.TextContent): + text_content = content_item.text + + # Always try to parse JSON if structured features are enabled and structuredContent is absent + if self.structured_output and text_content: + try: + parsed_data = json.loads(text_content) + return parsed_data + except json.JSONDecodeError: + logger.warning( + f"tool {self.name} expected structured output but got unparseable text: {text_content[:100]}..." + ) + # Fall through to return text as-is for backwards compatibility + + # Return simple text content (works for both modes) + return text_content + + elif isinstance(content_item, mcp.types.ImageContent): from PIL import Image - image_data = base64.b64decode(content.data) + image_data = base64.b64decode(content_item.data) image = Image.open(BytesIO(image_data)) return image - if isinstance(content, mcp.types.AudioContent): + elif isinstance(content_item, mcp.types.AudioContent): if not _is_package_available("torchaudio"): raise ValueError( "Audio content requires the torchaudio package to be installed. " @@ -134,34 +218,23 @@ def forward( else: import torchaudio # type: ignore - audio_data = base64.b64decode(content.data) + audio_data = base64.b64decode(content_item.data) audio_io = BytesIO(audio_data) audio_tensor, _ = torchaudio.load(audio_io) return audio_tensor - raise ValueError( - f"tool {self.name} returned an unsupported content type: {type(content)}" - ) - - # make sure jsonref are resolved - input_schema = { - k: v - for k, v in jsonref.replace_refs(mcp_tool.inputSchema).items() - if k != "$defs" - } - - # make sure mandatory `description` and `type` is provided for each arguments: - for k, v in input_schema["properties"].items(): - if "description" not in v: - input_schema["properties"][k]["description"] = "see tool description" - if "type" not in v: - input_schema["properties"][k]["type"] = "string" + else: + raise ValueError( + f"tool {self.name} returned an unsupported content type: {type(content_item)}" + ) tool = MCPAdaptTool( name=mcp_tool.name, description=mcp_tool.description or "", inputs=input_schema["properties"], - output_type="object", + output_type=output_type, + output_schema=output_schema, + structured_output=self.structured_output, ) return tool diff --git a/tests/test_smolagents_adapter.py b/tests/test_smolagents_adapter.py index d73caa8..c1014ec 100644 --- a/tests/test_smolagents_adapter.py +++ b/tests/test_smolagents_adapter.py @@ -1,5 +1,6 @@ from pathlib import Path from textwrap import dedent +import logging import pytest from mcp import StdioServerParameters @@ -20,7 +21,7 @@ def echo_server_script(): def echo_tool(text: str) -> str: """Echo the input text""" return f"Echo: {text}" - + mcp.run() ''' ) @@ -70,7 +71,7 @@ def echo_tool_union_none(text: str | None) -> str: if text is None: return "No input provided" return f"Echo: {text}" - + mcp.run() ''' ) @@ -149,7 +150,7 @@ def test_tool_name_with_dashes(): def echo_tool(text: str) -> str: """Echo the input text""" return f"Echo: {text}" - + mcp.run() ''' ) @@ -175,7 +176,7 @@ def test_tool_name_with_keyword(): def echo_tool(text: str) -> str: """Echo the input text""" return f"Echo: {text}" - + mcp.run() ''' ) @@ -241,7 +242,7 @@ def test_audio() -> AudioContent: path = os.path.join("{shared_datadir}", "white_noise.wav") with open(path, "rb") as f: wav_bytes = f.read() - + return AudioContent(type="audio", data=base64.b64encode(wav_bytes).decode(), mimeType="audio/wav") mcp.run() @@ -259,3 +260,102 @@ def test_audio() -> AudioContent: assert tools[0].name == "test_audio" audio_content = tools[0]() assert isinstance(audio_content, Tensor) + + +def test_structured_output_types(): + """Test that structured output returns correct types for different return annotations.""" + server_script = dedent( + """ + from mcp.server.fastmcp import FastMCP + from typing import Any + + mcp = FastMCP("Types Server") + + @mcp.tool() + def dict_tool() -> dict[str, Any]: + '''Returns a dictionary''' + return {"weather": "sunny", "temperature": 70} + + @mcp.tool() + def list_tool() -> list[str]: + '''Returns a list''' + return ["London", "Paris", "Tokyo"] + + @mcp.tool() + def string_tool() -> str: + '''Returns a string''' + return "Hello world" + + mcp.run() + """ + ) + + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", server_script] + ), + SmolAgentsAdapter(structured_output=True), + ) as tools: + dict_tool, list_tool, string_tool = tools + + # Dict tool: should return dict directly with schema + assert dict_tool.output_type == "object" + assert dict_tool.output_schema is not None + dict_result = dict_tool() + assert isinstance(dict_result, dict) + assert dict_result["weather"] == "sunny" + assert dict_result["temperature"] == 70 + + # List tool: should be wrapped in {"result": list} with schema + assert list_tool.output_type == "object" + assert list_tool.output_schema is not None + list_result = list_tool() + assert isinstance(list_result, dict) + assert "result" in list_result + assert set(list_result["result"]) == {"London", "Paris", "Tokyo"} + + # String tool: should be wrapped in {"result": string} with schema + assert string_tool.output_type == "object" + assert string_tool.output_schema is not None + string_result = string_tool() + assert isinstance(string_result, dict) + assert "result" in string_result + assert string_result["result"] == "Hello world" + + +def test_structured_output_warning(caplog): + """Test that warning is logged when tool returns unparseable JSON for structured output.""" + server_script = dedent( + ''' + from mcp.server.fastmcp import FastMCP + from typing import Any + + mcp = FastMCP("Invalid Server") + + @mcp.tool() + def invalid_tool() -> dict[str, Any]: + """Tool that returns invalid JSON when dict is expected.""" + return "not valid json" # type: ignore + + mcp.run() + ''' + ) + + with MCPAdapt( + StdioServerParameters( + command="uv", args=["run", "python", "-c", server_script] + ), + SmolAgentsAdapter(structured_output=True), + ) as tools: + tool = tools[0] + + # Tool should still work but return error string + result = tool() + assert isinstance(result, str) + assert "error" in result.lower() + + # Warning should be logged about unparseable JSON + assert any( + r.levelno == logging.WARNING and "unparseable" in r.message.lower() + for r in caplog.records + ) diff --git a/uv.lock b/uv.lock index 374334f..0db43d4 100644 --- a/uv.lock +++ b/uv.lock @@ -2542,7 +2542,7 @@ requires-dist = [ { name = "langgraph", marker = "extra == 'langchain'", specifier = ">=0.2.62" }, { name = "langgraph", marker = "extra == 'test'", specifier = ">=0.2.62" }, { name = "llama-index", marker = "extra == 'llamaindex'", specifier = ">=0.12.14" }, - { name = "mcp", specifier = ">=1.9.4" }, + { name = "mcp", specifier = ">=1.10.1" }, { name = "pydantic", specifier = ">=2.10.6" }, { name = "pytest", marker = "extra == 'test'", specifier = ">=8.3.4" }, { name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=0.25.2" },