From bf363bbbbc1fcb460044a857e8e9ffaabc1255c4 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Sep 2025 10:58:19 +0200 Subject: [PATCH 01/26] Add parse for response format --- haystack/components/generators/chat/openai.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index f1af6e65c6..ec8e46aa9a 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -138,6 +138,7 @@ def __init__( # pylint: disable=too-many-positional-arguments Bigger values mean the model will be less likely to repeat the same token in the text. - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the values are the bias to add to that token. + - `response_format`: A pydantic model to parse the response into. :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment variable, or 30 seconds. @@ -153,6 +154,7 @@ def __init__( # pylint: disable=too-many-positional-arguments :param http_client_kwargs: A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`. For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client). + """ self.api_key = api_key self.model = model @@ -272,6 +274,7 @@ def run( streaming_callback = select_streaming_callback( init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=False ) + chat_completion: Union[Stream[ChatCompletionChunk], ChatCompletion] = None api_args = self._prepare_api_call( messages=messages, @@ -280,9 +283,13 @@ def run( tools=tools, tools_strict=tools_strict, ) - chat_completion: Union[Stream[ChatCompletionChunk], ChatCompletion] = self.client.chat.completions.create( - **api_args - ) + response_format = self.generation_kwargs.get("response_format", None) + if response_format: + api_args["response_format"] = response_format + api_args.pop("stream") + chat_completion = self.client.chat.completions.parse(**api_args) + else: + chat_completion = self.client.chat.completions.create(**api_args) if streaming_callback is not None: completions = self._handle_stream_response( From df844b4aa46c922b7dadd750b30dd1ef42c22c11 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Sep 2025 11:10:35 +0200 Subject: [PATCH 02/26] Update response_format --- haystack/components/generators/chat/openai.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index ec8e46aa9a..e05ee2e56a 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -167,6 +167,7 @@ def __init__( # pylint: disable=too-many-positional-arguments self.tools = tools # Store tools as-is, whether it's a list or a Toolset self.tools_strict = tools_strict self.http_client_kwargs = http_client_kwargs + self.response_format = generation_kwargs.get("response_format", None) # Check for duplicate tool names _check_duplicate_tool_names(list(self.tools or [])) @@ -283,10 +284,7 @@ def run( tools=tools, tools_strict=tools_strict, ) - response_format = self.generation_kwargs.get("response_format", None) - if response_format: - api_args["response_format"] = response_format - api_args.pop("stream") + if self.response_format: chat_completion = self.client.chat.completions.parse(**api_args) else: chat_completion = self.client.chat.completions.create(**api_args) @@ -423,6 +421,18 @@ def _prepare_api_call( # noqa: PLR0913 is_streaming = streaming_callback is not None num_responses = generation_kwargs.pop("n", 1) + + if self.response_format: + if is_streaming: + raise ValueError("Cannot stream responses with response_format, please choose one.") + return { + "model": self.model, + "messages": openai_formatted_messages, + "n": num_responses, + "response_format": self.response_format, + **openai_tools, + **generation_kwargs, + } if is_streaming and num_responses > 1: raise ValueError("Cannot stream multiple responses, please set n=1.") From cb5f2dc666e24a5f53676e0d8a819ad5be13e1bc Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Sep 2025 14:01:56 +0200 Subject: [PATCH 03/26] Add tests --- haystack/components/generators/chat/openai.py | 33 ++++++++++++------- .../components/generators/chat/test_openai.py | 26 ++++++++++++++- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index e05ee2e56a..079dc64016 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -13,6 +13,8 @@ ChatCompletionChunk, ChatCompletionMessage, ChatCompletionMessageCustomToolCall, + ParsedChatCompletion, + ParsedChatCompletionMessage, ) from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice @@ -138,7 +140,8 @@ def __init__( # pylint: disable=too-many-positional-arguments Bigger values mean the model will be less likely to repeat the same token in the text. - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the values are the bias to add to that token. - - `response_format`: A pydantic model to parse the response into. + - `response_format`: A json schema or pydantic model to parse the response into. + For more information, see the [OpenAI documentation](https://platform.openai.com/docs/guides/structured-outputs). :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment variable, or 30 seconds. @@ -167,7 +170,7 @@ def __init__( # pylint: disable=too-many-positional-arguments self.tools = tools # Store tools as-is, whether it's a list or a Toolset self.tools_strict = tools_strict self.http_client_kwargs = http_client_kwargs - self.response_format = generation_kwargs.get("response_format", None) + self._response_format = generation_kwargs.get("response_format", None) if generation_kwargs else None # Check for duplicate tool names _check_duplicate_tool_names(list(self.tools or [])) @@ -275,7 +278,7 @@ def run( streaming_callback = select_streaming_callback( init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=False ) - chat_completion: Union[Stream[ChatCompletionChunk], ChatCompletion] = None + chat_completion: Union[Stream[ChatCompletionChunk], ChatCompletion, ParsedChatCompletion] api_args = self._prepare_api_call( messages=messages, @@ -284,10 +287,11 @@ def run( tools=tools, tools_strict=tools_strict, ) - if self.response_format: + if self._response_format: chat_completion = self.client.chat.completions.parse(**api_args) else: chat_completion = self.client.chat.completions.create(**api_args) + print(chat_completion) if streaming_callback is not None: completions = self._handle_stream_response( @@ -351,6 +355,7 @@ async def run_async( streaming_callback = select_streaming_callback( init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=True ) + chat_completion: Union[AsyncStream[ChatCompletionChunk], ChatCompletion, ParsedChatCompletion] if len(messages) == 0: return {"replies": []} @@ -363,9 +368,10 @@ async def run_async( tools_strict=tools_strict, ) - chat_completion: Union[ - AsyncStream[ChatCompletionChunk], ChatCompletion - ] = await self.async_client.chat.completions.create(**api_args) + if self._response_format: + chat_completion = await self.async_client.chat.completions.parse(**api_args) + else: + chat_completion = await self.async_client.chat.completions.create(**api_args) if streaming_callback is not None: completions = await self._handle_async_stream_response( @@ -422,17 +428,18 @@ def _prepare_api_call( # noqa: PLR0913 is_streaming = streaming_callback is not None num_responses = generation_kwargs.pop("n", 1) - if self.response_format: + if self._response_format: if is_streaming: - raise ValueError("Cannot stream responses with response_format, please choose one.") + raise ValueError("OpenAI cannot stream responses with `response_format`, please choose one.") return { "model": self.model, "messages": openai_formatted_messages, "n": num_responses, - "response_format": self.response_format, + "response_format": self._response_format, **openai_tools, **generation_kwargs, } + if is_streaming and num_responses > 1: raise ValueError("Cannot stream multiple responses, please set n=1.") @@ -488,7 +495,9 @@ def _check_finish_reason(meta: dict[str, Any]) -> None: ) -def _convert_chat_completion_to_chat_message(completion: ChatCompletion, choice: Choice) -> ChatMessage: +def _convert_chat_completion_to_chat_message( + completion: Union[ChatCompletion, ParsedChatCompletion], choice: Choice +) -> ChatMessage: """ Converts the non-streaming response from the OpenAI API to a ChatMessage. @@ -496,7 +505,7 @@ def _convert_chat_completion_to_chat_message(completion: ChatCompletion, choice: :param choice: The choice returned by the OpenAI API. :return: The ChatMessage. """ - message: ChatCompletionMessage = choice.message + message: Union[ChatCompletionMessage, ParsedChatCompletionMessage] = choice.message text = message.content tool_calls = [] if message.tool_calls: diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index ad4b9e1c3e..0386ba825d 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -import base64 +import json import logging import os from datetime import datetime @@ -22,6 +22,7 @@ from openai.types.chat.chat_completion_chunk import ChoiceDelta, ChoiceDeltaToolCall, ChoiceDeltaToolCallFunction from openai.types.chat.chat_completion_message_function_tool_call import Function from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage, PromptTokensDetails +from pydantic import BaseModel from haystack import component from haystack.components.generators.chat.openai import ( @@ -617,6 +618,29 @@ def test_live_run(self): assert message.meta["finish_reason"] == "stop" assert message.meta["usage"]["prompt_tokens"] > 0 + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_response_format(self): + class CalendarEvent(BaseModel): + name: str + date: str + location: str + + chat_messages = [ChatMessage.from_user("Describe the 20th Nobel Peace Prize.")] + component = OpenAIChatGenerator(generation_kwargs={"response_format": CalendarEvent}) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + message: ChatMessage = results["replies"][0] + msg = json.loads(message.text) + assert "20th Nobel Peace Prize" in msg["name"] + assert isinstance(msg["date"], str) + assert isinstance(msg["location"], str) + + assert message.meta["finish_reason"] == "stop" + def test_run_with_wrong_model(self): mock_client = MagicMock() mock_client.chat.completions.create.side_effect = OpenAIError("Invalid model name") From f54826d9e33bcf08fc541fe4e3b79d5201ca40f0 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Sep 2025 14:53:27 +0200 Subject: [PATCH 04/26] Add release notes --- haystack/components/generators/chat/openai.py | 8 +++++--- .../add-openai-structured-outputs-906be78a984a1079.yaml | 4 ++++ 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 079dc64016..a1d77267b4 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -140,8 +140,11 @@ def __init__( # pylint: disable=too-many-positional-arguments Bigger values mean the model will be less likely to repeat the same token in the text. - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the values are the bias to add to that token. - - `response_format`: A json schema or pydantic model to parse the response into. - For more information, see the [OpenAI documentation](https://platform.openai.com/docs/guides/structured-outputs). + - `response_format`: A JSON schema or Pydantic model that enforces the structure of the model's response. + If provided, the output will always be validated against this + format (unless the model returns a tool call). + For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). + Note: This parameter is only supported for latest models starting with GPT-4o. :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment variable, or 30 seconds. @@ -291,7 +294,6 @@ def run( chat_completion = self.client.chat.completions.parse(**api_args) else: chat_completion = self.client.chat.completions.create(**api_args) - print(chat_completion) if streaming_callback is not None: completions = self._handle_stream_response( diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml new file mode 100644 index 0000000000..22989820f1 --- /dev/null +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -0,0 +1,4 @@ +--- +features: + - OpenAIChatGenerator now supports structured outputs by passing a `response_format` parameter. + - The `response_format` parameter can be pydantic model or JSON schema. From 4bc8b650835391436d050093a13f1e43bf3f2a07 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 2 Sep 2025 15:24:58 +0200 Subject: [PATCH 05/26] Update checks --- haystack/components/generators/chat/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index a1d77267b4..43936bdba8 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -173,7 +173,6 @@ def __init__( # pylint: disable=too-many-positional-arguments self.tools = tools # Store tools as-is, whether it's a list or a Toolset self.tools_strict = tools_strict self.http_client_kwargs = http_client_kwargs - self._response_format = generation_kwargs.get("response_format", None) if generation_kwargs else None # Check for duplicate tool names _check_duplicate_tool_names(list(self.tools or [])) @@ -406,6 +405,7 @@ def _prepare_api_call( # noqa: PLR0913 ) -> dict[str, Any]: # update generation kwargs by merging with the generation kwargs passed to the run method generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})} + self._response_format = generation_kwargs.get("response_format") if generation_kwargs else None # adapt ChatMessage(s) to the format expected by the OpenAI API openai_formatted_messages = [message.to_openai_dict_format() for message in messages] From 1b5ed82a420a9725fbadb0d5eefc3f3d2f4b3abc Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Wed, 3 Sep 2025 16:24:45 +0200 Subject: [PATCH 06/26] remove instance var --- haystack/components/generators/chat/azure.py | 5 +++++ haystack/components/generators/chat/openai.py | 11 ++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index d11168989d..c2103b2122 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -123,6 +123,11 @@ def __init__( # pylint: disable=too-many-positional-arguments Higher values make the model less likely to repeat the token. - `logit_bias`: Adds a logit bias to specific tokens. The keys of the dictionary are tokens, and the values are the bias to add to that token. + - `response_format`: A JSON schema or Pydantic model that enforces the structure of the model's response. + If provided, the output will always be validated against this + format (unless the model returns a tool call). + For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). + Note: This parameter is only supported for latest models starting with GPT-4o. :param default_headers: Default headers to use for the AzureOpenAI client. :param tools: A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 43936bdba8..8d74ef1caf 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -208,6 +208,7 @@ def to_dict(self) -> dict[str, Any]: The serialized component as a dictionary. """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None + return default_to_dict( self, model=self.model, @@ -289,7 +290,7 @@ def run( tools=tools, tools_strict=tools_strict, ) - if self._response_format: + if "response_format" in api_args.keys(): chat_completion = self.client.chat.completions.parse(**api_args) else: chat_completion = self.client.chat.completions.create(**api_args) @@ -369,7 +370,7 @@ async def run_async( tools_strict=tools_strict, ) - if self._response_format: + if "response_format" in api_args.keys(): chat_completion = await self.async_client.chat.completions.parse(**api_args) else: chat_completion = await self.async_client.chat.completions.create(**api_args) @@ -405,7 +406,7 @@ def _prepare_api_call( # noqa: PLR0913 ) -> dict[str, Any]: # update generation kwargs by merging with the generation kwargs passed to the run method generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})} - self._response_format = generation_kwargs.get("response_format") if generation_kwargs else None + response_format = generation_kwargs.get("response_format") if generation_kwargs else None # adapt ChatMessage(s) to the format expected by the OpenAI API openai_formatted_messages = [message.to_openai_dict_format() for message in messages] @@ -430,14 +431,14 @@ def _prepare_api_call( # noqa: PLR0913 is_streaming = streaming_callback is not None num_responses = generation_kwargs.pop("n", 1) - if self._response_format: + if response_format: if is_streaming: raise ValueError("OpenAI cannot stream responses with `response_format`, please choose one.") return { "model": self.model, "messages": openai_formatted_messages, "n": num_responses, - "response_format": self._response_format, + "response_format": response_format, **openai_tools, **generation_kwargs, } From 02d3d59c142b1228b2446e49bd6da4fd1c7e5b3f Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Thu, 4 Sep 2025 10:45:54 +0200 Subject: [PATCH 07/26] Add tests for azure --- haystack/components/generators/chat/openai.py | 9 ++++++- ...i-structured-outputs-906be78a984a1079.yaml | 2 +- test/components/generators/chat/test_azure.py | 27 +++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 8d74ef1caf..7ec74b8378 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -8,6 +8,7 @@ from typing import Any, Optional, Union from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream +from openai.lib._pydantic import to_strict_json_schema from openai.types.chat import ( ChatCompletion, ChatCompletionChunk, @@ -18,6 +19,7 @@ ) from openai.types.chat.chat_completion import Choice from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice +from pydantic import BaseModel from haystack import component, default_from_dict, default_to_dict, logging from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message @@ -208,6 +210,9 @@ def to_dict(self) -> dict[str, Any]: The serialized component as a dictionary. """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None + response_format = self.generation_kwargs.get("response_format") + if response_format and issubclass(response_format, BaseModel): + self.generation_kwargs["response_format"] = to_strict_json_schema(response_format) return default_to_dict( self, @@ -433,7 +438,9 @@ def _prepare_api_call( # noqa: PLR0913 if response_format: if is_streaming: - raise ValueError("OpenAI cannot stream responses with `response_format`, please choose one.") + raise ValueError( + "OpenAI does not support `streaming_callback` with `response_format`, please choose one." + ) return { "model": self.model, "messages": openai_formatted_messages, diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml index 22989820f1..ba95acf85e 100644 --- a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -1,4 +1,4 @@ --- features: - OpenAIChatGenerator now supports structured outputs by passing a `response_format` parameter. - - The `response_format` parameter can be pydantic model or JSON schema. + The `response_format` parameter can be a pydantic model or a JSON schema. diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 56d1a1740e..8c03cf7476 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -2,11 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 +import json import os from typing import Any, Optional import pytest from openai import OpenAIError +from pydantic import BaseModel from haystack import Pipeline, component from haystack.components.generators.chat import AzureOpenAIChatGenerator @@ -331,6 +333,31 @@ def test_live_run_with_tools(self, tools): assert tool_call.arguments == {"city": "Paris"} assert message.meta["finish_reason"] == "tool_calls" + @pytest.mark.skipif( + not os.environ.get("AZURE_OPENAI_API_KEY", None), + reason="Export an env var called AZURE_OPENAI_API_KEY containing the Azure OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_response_format(self): + class CalendarEvent(BaseModel): + name: str + date: str + location: str + + chat_messages = [ChatMessage.from_user("Describe the 20th Nobel Peace Prize.")] + component = AzureOpenAIChatGenerator( + api_version="2024-08-01-preview", generation_kwargs={"response_format": CalendarEvent} + ) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + message: ChatMessage = results["replies"][0] + msg = json.loads(message.text) + assert "20th Nobel Peace Prize" in msg["name"] + assert isinstance(msg["date"], str) + assert isinstance(msg["location"], str) + + assert message.meta["finish_reason"] == "stop" + def test_to_dict_with_toolset(self, tools, monkeypatch): """Test that the AzureOpenAIChatGenerator can be serialized to a dictionary with a Toolset.""" monkeypatch.setenv("AZURE_OPENAI_API_KEY", "test-api-key") From 060c4f006ee8411ec09f9978f36df5b1735a88af Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Thu, 4 Sep 2025 11:31:23 +0200 Subject: [PATCH 08/26] Add schema test --- ...i-structured-outputs-906be78a984a1079.yaml | 5 +- .../components/generators/chat/test_openai.py | 65 +++++++++++++++++-- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml index ba95acf85e..28279fda7b 100644 --- a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -1,4 +1,5 @@ --- features: - - OpenAIChatGenerator now supports structured outputs by passing a `response_format` parameter. - The `response_format` parameter can be a pydantic model or a JSON schema. + - | + `OpenAIChatGenerator` and `AzureOpenAIChatGenerator` now support structured outputs by passing a `response_format` parameter. + The `response_format` parameter can be a pydantic model or a JSON schema. diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index 0386ba825d..73218bba2d 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -37,6 +37,12 @@ from haystack.utils.auth import Secret +class CalendarEvent(BaseModel): + name: str + date: str + location: str + + @pytest.fixture def chat_messages(): return [ @@ -227,7 +233,7 @@ def test_to_dict_with_parameters(self, monkeypatch): model="gpt-4o-mini", streaming_callback=print_streaming_chunk, api_base_url="test-base-url", - generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + generation_kwargs={"max_tokens": 10, "some_test_param": "test-params", "response_format": CalendarEvent}, tools=[tool], tools_strict=True, max_retries=10, @@ -235,6 +241,7 @@ def test_to_dict_with_parameters(self, monkeypatch): http_client_kwargs={"proxy": "http://example.com:8080", "verify": False}, ) data = component.to_dict() + print(data) assert data == { "type": "haystack.components.generators.chat.openai.OpenAIChatGenerator", @@ -246,7 +253,21 @@ def test_to_dict_with_parameters(self, monkeypatch): "max_retries": 10, "timeout": 100.0, "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", - "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "generation_kwargs": { + "max_tokens": 10, + "some_test_param": "test-params", + "response_format": { + "properties": { + "name": {"title": "Name", "type": "string"}, + "date": {"title": "Date", "type": "string"}, + "location": {"title": "Location", "type": "string"}, + }, + "required": ["name", "date", "location"], + "title": "CalendarEvent", + "type": "object", + "additionalProperties": False, + }, + }, "tools": [ { "type": "haystack.tools.tool.Tool", @@ -624,11 +645,6 @@ def test_live_run(self): ) @pytest.mark.integration def test_live_run_with_response_format(self): - class CalendarEvent(BaseModel): - name: str - date: str - location: str - chat_messages = [ChatMessage.from_user("Describe the 20th Nobel Peace Prize.")] component = OpenAIChatGenerator(generation_kwargs={"response_format": CalendarEvent}) results = component.run(chat_messages) @@ -641,6 +657,41 @@ class CalendarEvent(BaseModel): assert message.meta["finish_reason"] == "stop" + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_response_format_json_schema(self): + response_schema = { + "type": "json_schema", + "json_schema": { + "name": "CapitalCity", + "strict": True, + "schema": { + "title": "CapitalCity", + "type": "object", + "properties": { + "city": {"title": "City", "type": "string"}, + "country": {"title": "Country", "type": "string"}, + }, + "required": ["city", "country"], + "additionalProperties": False, + }, + }, + } + + chat_messages = [ChatMessage.from_user("What's the capital of France?")] + component = OpenAIChatGenerator(generation_kwargs={"response_format": response_schema}) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + message: ChatMessage = results["replies"][0] + msg = json.loads(message.text) + assert "Paris" in msg["city"] + assert isinstance(msg["country"], str) + assert "France" in msg["country"] + assert message.meta["finish_reason"] == "stop" + def test_run_with_wrong_model(self): mock_client = MagicMock() mock_client.chat.completions.create.side_effect = OpenAIError("Invalid model name") From c2d41fe290532a8ef3445420dc49d41e1a8cb7bc Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Thu, 4 Sep 2025 11:34:27 +0200 Subject: [PATCH 09/26] Add comments --- haystack/components/generators/chat/openai.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 7ec74b8378..449aa861d3 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -211,6 +211,9 @@ def to_dict(self) -> dict[str, Any]: """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None response_format = self.generation_kwargs.get("response_format") + + # If the response format is a Pydantic model, its converted to openai's json schema format + # If its already a json schema, it's left as is if response_format and issubclass(response_format, BaseModel): self.generation_kwargs["response_format"] = to_strict_json_schema(response_format) From 4c0069c8588aa719017b42112c0685d8ec87bfce Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Fri, 5 Sep 2025 16:07:08 +0200 Subject: [PATCH 10/26] Add streaming support --- haystack/components/generators/chat/openai.py | 24 +++++++------- .../components/generators/chat/test_openai.py | 32 ++++++++++++------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 449aa861d3..2c9772b79c 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -146,7 +146,10 @@ def __init__( # pylint: disable=too-many-positional-arguments If provided, the output will always be validated against this format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). - Note: This parameter is only supported for latest models starting with GPT-4o. + Note: + - This parameter is only supported for latest models starting with GPT-4o. + - For structured outputs with streaming, + the `response_format` must be a JSON schema and not a Pydantic model. :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment variable, or 30 seconds. @@ -298,10 +301,10 @@ def run( tools=tools, tools_strict=tools_strict, ) - if "response_format" in api_args.keys(): - chat_completion = self.client.chat.completions.parse(**api_args) - else: + if "stream" in api_args.keys(): chat_completion = self.client.chat.completions.create(**api_args) + else: + chat_completion = self.client.chat.completions.parse(**api_args) if streaming_callback is not None: completions = self._handle_stream_response( @@ -378,10 +381,10 @@ async def run_async( tools_strict=tools_strict, ) - if "response_format" in api_args.keys(): - chat_completion = await self.async_client.chat.completions.parse(**api_args) - else: + if "stream" in api_args.keys(): chat_completion = await self.async_client.chat.completions.create(**api_args) + else: + chat_completion = await self.async_client.chat.completions.parse(**api_args) if streaming_callback is not None: completions = await self._handle_async_stream_response( @@ -439,11 +442,7 @@ def _prepare_api_call( # noqa: PLR0913 is_streaming = streaming_callback is not None num_responses = generation_kwargs.pop("n", 1) - if response_format: - if is_streaming: - raise ValueError( - "OpenAI does not support `streaming_callback` with `response_format`, please choose one." - ) + if response_format and not is_streaming: return { "model": self.model, "messages": openai_formatted_messages, @@ -461,6 +460,7 @@ def _prepare_api_call( # noqa: PLR0913 "messages": openai_formatted_messages, "stream": streaming_callback is not None, "n": num_responses, + "response_format": response_format, **openai_tools, **generation_kwargs, } diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index 73218bba2d..a281d399fd 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -38,9 +38,14 @@ class CalendarEvent(BaseModel): - name: str - date: str - location: str + event_name: str + event_date: str + event_location: str + + +@pytest.fixture +def calendar_event_model(): + return CalendarEvent @pytest.fixture @@ -224,7 +229,7 @@ def test_to_dict_default(self, monkeypatch): }, } - def test_to_dict_with_parameters(self, monkeypatch): + def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): tool = Tool(name="name", description="description", parameters={"x": {"type": "string"}}, function=print) monkeypatch.setenv("ENV_VAR", "test-api-key") @@ -233,7 +238,11 @@ def test_to_dict_with_parameters(self, monkeypatch): model="gpt-4o-mini", streaming_callback=print_streaming_chunk, api_base_url="test-base-url", - generation_kwargs={"max_tokens": 10, "some_test_param": "test-params", "response_format": CalendarEvent}, + generation_kwargs={ + "max_tokens": 10, + "some_test_param": "test-params", + "response_format": calendar_event_model, + }, tools=[tool], tools_strict=True, max_retries=10, @@ -241,7 +250,6 @@ def test_to_dict_with_parameters(self, monkeypatch): http_client_kwargs={"proxy": "http://example.com:8080", "verify": False}, ) data = component.to_dict() - print(data) assert data == { "type": "haystack.components.generators.chat.openai.OpenAIChatGenerator", @@ -258,11 +266,11 @@ def test_to_dict_with_parameters(self, monkeypatch): "some_test_param": "test-params", "response_format": { "properties": { - "name": {"title": "Name", "type": "string"}, - "date": {"title": "Date", "type": "string"}, - "location": {"title": "Location", "type": "string"}, + "event_name": {"title": "Event Name", "type": "string"}, + "event_date": {"title": "Event Date", "type": "string"}, + "event_location": {"title": "Event Location", "type": "string"}, }, - "required": ["name", "date", "location"], + "required": ["event_name", "event_date", "event_location"], "title": "CalendarEvent", "type": "object", "additionalProperties": False, @@ -644,9 +652,9 @@ def test_live_run(self): reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", ) @pytest.mark.integration - def test_live_run_with_response_format(self): + def test_live_run_with_response_format(self, calendar_event_model): chat_messages = [ChatMessage.from_user("Describe the 20th Nobel Peace Prize.")] - component = OpenAIChatGenerator(generation_kwargs={"response_format": CalendarEvent}) + component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model}) results = component.run(chat_messages) assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] From b123b9aa4d043074b21095f04dc8baacc748a106 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Fri, 5 Sep 2025 16:15:54 +0200 Subject: [PATCH 11/26] PR comments --- ...i-structured-outputs-906be78a984a1079.yaml | 22 +++++++++++++++++-- test/components/generators/chat/test_azure.py | 14 ++++++------ .../components/generators/chat/test_openai.py | 8 +++---- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml index 28279fda7b..f74a3de264 100644 --- a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -1,5 +1,23 @@ --- features: - | - `OpenAIChatGenerator` and `AzureOpenAIChatGenerator` now support structured outputs by passing a `response_format` parameter. - The `response_format` parameter can be a pydantic model or a JSON schema. + `OpenAIChatGenerator` and `AzureOpenAIChatGenerator` now support structured outputs using `response_format` + parameter that can be passed in generation_kwargs. + The `response_format` parameter can be a Pydantic model or a JSON schema for non-streaming responses. For streaming responses, the `response_format` must be a JSON schema. + Example usage of the `response_format` parameter: + ```python + from pydantic import BaseModel + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.dataclasses import ChatMessage + + class CalendarEvent(BaseModel): + event_name: str + event_date: str + event_location: str + + client = OpenAIChatGenerator(model="gpt-4o-2024-08-06", generation_kwargs={"response_format": CalendarEvent}) + + response = client.run(messages=[ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")])) + print(response) + + ``` diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 8c03cf7476..ca0055e7a7 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -340,11 +340,11 @@ def test_live_run_with_tools(self, tools): @pytest.mark.integration def test_live_run_with_response_format(self): class CalendarEvent(BaseModel): - name: str - date: str - location: str + event_name: str + event_date: str + event_location: str - chat_messages = [ChatMessage.from_user("Describe the 20th Nobel Peace Prize.")] + chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] component = AzureOpenAIChatGenerator( api_version="2024-08-01-preview", generation_kwargs={"response_format": CalendarEvent} ) @@ -352,9 +352,9 @@ class CalendarEvent(BaseModel): assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "20th Nobel Peace Prize" in msg["name"] - assert isinstance(msg["date"], str) - assert isinstance(msg["location"], str) + assert "20th Nobel Peace Prize" in msg["event_name"] + assert isinstance(msg["event_date"], str) + assert isinstance(msg["event_location"], str) assert message.meta["finish_reason"] == "stop" diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index a281d399fd..faa6ab6659 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -653,15 +653,15 @@ def test_live_run(self): ) @pytest.mark.integration def test_live_run_with_response_format(self, calendar_event_model): - chat_messages = [ChatMessage.from_user("Describe the 20th Nobel Peace Prize.")] + chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model}) results = component.run(chat_messages) assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "20th Nobel Peace Prize" in msg["name"] - assert isinstance(msg["date"], str) - assert isinstance(msg["location"], str) + assert "20th Nobel Peace Prize" in msg["event_name"] + assert isinstance(msg["event_date"], str) + assert isinstance(msg["event_location"], str) assert message.meta["finish_reason"] == "stop" From cde0714b5b00dcb4f861d31cf0773b810ee1eb99 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Sep 2025 09:27:58 +0200 Subject: [PATCH 12/26] PR comments --- haystack/components/generators/chat/azure.py | 5 ++++- haystack/components/generators/chat/openai.py | 12 ++++++++++- ...i-structured-outputs-906be78a984a1079.yaml | 21 ++++++++++++------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index c2103b2122..3a6069ce8b 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -127,7 +127,10 @@ def __init__( # pylint: disable=too-many-positional-arguments If provided, the output will always be validated against this format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). - Note: This parameter is only supported for latest models starting with GPT-4o. + Note: + - This parameter is only supported for latest models starting with GPT-4o. + - For structured outputs with streaming, + the `response_format` must be a JSON schema and not a Pydantic model. :param default_headers: Default headers to use for the AzureOpenAI client. :param tools: A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 2c9772b79c..9415b12432 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -301,7 +301,8 @@ def run( tools=tools, tools_strict=tools_strict, ) - if "stream" in api_args.keys(): + openai_endpoint = api_args.pop("openai_endpoint") + if openai_endpoint == "create": chat_completion = self.client.chat.completions.create(**api_args) else: chat_completion = self.client.chat.completions.parse(**api_args) @@ -443,11 +444,16 @@ def _prepare_api_call( # noqa: PLR0913 num_responses = generation_kwargs.pop("n", 1) if response_format and not is_streaming: + # for structured outputs without streaming, we use openai's parse endpoint + # Note: `stream` cannot be passed to chat.completions.parse + # we pass a key `openai_endpoint` as a hint to the run method to use the parse endpoint + # this key will be removed before the API call is made return { "model": self.model, "messages": openai_formatted_messages, "n": num_responses, "response_format": response_format, + "openai_endpoint": "parse", **openai_tools, **generation_kwargs, } @@ -455,12 +461,16 @@ def _prepare_api_call( # noqa: PLR0913 if is_streaming and num_responses > 1: raise ValueError("Cannot stream multiple responses, please set n=1.") + # for structured outputs with streaming, we use openai's create endpoint + # we pass a key `openai_endpoint` as a hint to the run method to use the create endpoint + # this key will be removed before the API call is made return { "model": self.model, "messages": openai_formatted_messages, "stream": streaming_callback is not None, "n": num_responses, "response_format": response_format, + "openai_endpoint": "create", **openai_tools, **generation_kwargs, } diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml index f74a3de264..f1cb9d0d66 100644 --- a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -2,7 +2,7 @@ features: - | `OpenAIChatGenerator` and `AzureOpenAIChatGenerator` now support structured outputs using `response_format` - parameter that can be passed in generation_kwargs. + parameter that can be passed in `generation_kwargs`. The `response_format` parameter can be a Pydantic model or a JSON schema for non-streaming responses. For streaming responses, the `response_format` must be a JSON schema. Example usage of the `response_format` parameter: ```python @@ -10,14 +10,21 @@ features: from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage - class CalendarEvent(BaseModel): - event_name: str - event_date: str - event_location: str + class NobelPrizeInfo(BaseModel): + recipient_name: str + award_year: int + category: str + achievement_description: str + nationality: str - client = OpenAIChatGenerator(model="gpt-4o-2024-08-06", generation_kwargs={"response_format": CalendarEvent}) + client = OpenAIChatGenerator( + model="gpt-4o-2024-08-06", + generation_kwargs={"response_format": NobelPrizeInfo} + ) - response = client.run(messages=[ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")])) + response = client.run(messages=[ + ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize winner.") + ]) print(response) ``` From 2b4a0ffab477d6f073ac5aa1edc001991d600d6a Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Sep 2025 09:43:42 +0200 Subject: [PATCH 13/26] Add tests --- haystack/components/generators/chat/openai.py | 7 +++-- .../components/generators/chat/test_openai.py | 31 +++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 9415b12432..c0ed893f03 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -304,7 +304,7 @@ def run( openai_endpoint = api_args.pop("openai_endpoint") if openai_endpoint == "create": chat_completion = self.client.chat.completions.create(**api_args) - else: + elif openai_endpoint == "parse": chat_completion = self.client.chat.completions.parse(**api_args) if streaming_callback is not None: @@ -382,9 +382,10 @@ async def run_async( tools_strict=tools_strict, ) - if "stream" in api_args.keys(): + openai_endpoint = api_args.pop("openai_endpoint") + if openai_endpoint == "create": chat_completion = await self.async_client.chat.completions.create(**api_args) - else: + elif openai_endpoint == "parse": chat_completion = await self.async_client.chat.completions.parse(**api_args) if streaming_callback is not None: diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index faa6ab6659..9fadb1845c 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -700,6 +700,37 @@ def test_live_run_with_response_format_json_schema(self): assert "France" in msg["country"] assert message.meta["finish_reason"] == "stop" + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_response_format_and_streaming_pydantic_model(self, calendar_event_model): + chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] + component = OpenAIChatGenerator( + generation_kwargs={"response_format": calendar_event_model}, streaming_callback=print_streaming_chunk + ) + with pytest.raises(OpenAIError): + component.run(chat_messages) + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_response_format_and_streaming(self, calendar_event_model): + chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] + component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model}) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + message: ChatMessage = results["replies"][0] + msg = json.loads(message.text) + assert "20th Nobel Peace Prize" in msg["event_name"] + assert isinstance(msg["event_date"], str) + assert isinstance(msg["event_location"], str) + + assert message.meta["finish_reason"] == "stop" + def test_run_with_wrong_model(self): mock_client = MagicMock() mock_client.chat.completions.create.side_effect = OpenAIError("Invalid model name") From 0dbe1fdc2ab229dc7e913b81101422d78eeb6ed5 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Sep 2025 09:59:35 +0200 Subject: [PATCH 14/26] Fix tests --- test/components/generators/chat/test_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index 9fadb1845c..b5876190e4 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -710,7 +710,7 @@ def test_live_run_with_response_format_and_streaming_pydantic_model(self, calend component = OpenAIChatGenerator( generation_kwargs={"response_format": calendar_event_model}, streaming_callback=print_streaming_chunk ) - with pytest.raises(OpenAIError): + with pytest.raises(TypeError): component.run(chat_messages) @pytest.mark.skipif( From 52401d6a8f1914a088f2be3705151ddff9fa5b6b Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Sep 2025 13:35:25 +0200 Subject: [PATCH 15/26] Add unit tests --- haystack/components/generators/chat/openai.py | 5 +- .../components/generators/chat/test_openai.py | 183 ++++++++++++++++-- 2 files changed, 174 insertions(+), 14 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index c0ed893f03..6431abe132 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -147,7 +147,9 @@ def __init__( # pylint: disable=too-many-positional-arguments format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). Note: - - This parameter is only supported for latest models starting with GPT-4o. + - This parameter accepts Pydantic models and JSON schemas for latest models starting with GPT-4o. + Older models only support basic version of structured outputs through `{"type": "json_object"}`. + For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode). - For structured outputs with streaming, the `response_format` must be a JSON schema and not a Pydantic model. :param timeout: @@ -306,6 +308,7 @@ def run( chat_completion = self.client.chat.completions.create(**api_args) elif openai_endpoint == "parse": chat_completion = self.client.chat.completions.parse(**api_args) + print(chat_completion) if streaming_callback is not None: completions = self._handle_stream_response( diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index b5876190e4..b2065821d8 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -16,6 +16,11 @@ ChatCompletionChunk, ChatCompletionMessage, ChatCompletionMessageFunctionToolCall, + ParsedChatCompletion, + ParsedChatCompletionMessage, + ParsedChoice, + ParsedFunction, + ParsedFunctionToolCall, chat_completion_chunk, ) from openai.types.chat.chat_completion import Choice @@ -104,6 +109,40 @@ def weather_function(city: str) -> dict[str, Any]: return weather_info.get(city, {"weather": "unknown", "temperature": 0, "unit": "celsius"}) +# mock chat completions with structured outputs +@pytest.fixture +def mock_parsed_chat_completion(): + with patch("openai.resources.chat.completions.Completions.parse") as mock_chat_completion_parse: + completion = ParsedChatCompletion[CalendarEvent]( + id="json_foo", + model="gpt-4o-mini-2024-07-18", + object="chat.completion", + choices=[ + ParsedChoice[CalendarEvent]( + finish_reason="stop", + index=0, + message=ParsedChatCompletionMessage[CalendarEvent]( + content='{"event_name":"Team Meeting","event_date":"2024-03-15",' + '"event_location":"Conference Room A"}', + refusal=None, + role="assistant", + annotations=[], + audio=None, + function_call=None, + tool_calls=None, + parsed=CalendarEvent( + event_name="Team Meeting", event_date="2024-03-15", event_location="Conference Room A" + ), + ), + ) + ], + created=1757328264, + usage=CompletionUsage(completion_tokens=29, prompt_tokens=86, total_tokens=115), + ) + mock_chat_completion_parse.return_value = completion + yield mock_chat_completion_parse + + @component class MessageExtractor: @component.output_types(messages=list[str], meta=dict[str, Any]) @@ -431,6 +470,28 @@ def streaming_callback(chunk: StreamingChunk) -> None: assert [isinstance(reply, ChatMessage) for reply in response["replies"]] assert "Hello" in response["replies"][0].text # see openai_mock_chat_completion_chunk + def test_run_with_response_format(self, chat_messages, mock_parsed_chat_completion): + component = OpenAIChatGenerator( + api_key=Secret.from_token("test-api-key"), generation_kwargs={"response_format": CalendarEvent} + ) + response = component.run(chat_messages) + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert [isinstance(reply, ChatMessage) for reply in response["replies"]] + assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion + + def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed_chat_completion): + component = OpenAIChatGenerator(api_key=Secret.from_token("test-api-key")) + response = component.run(chat_messages, generation_kwargs={"response_format": CalendarEvent}) + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert [isinstance(reply, ChatMessage) for reply in response["replies"]] + assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion + def test_run_with_wrapped_stream_simulation(self, chat_messages, openai_mock_stream): streaming_callback_called = False @@ -553,6 +614,66 @@ def test_run_with_tools(self, tools): assert message.meta["finish_reason"] == "tool_calls" assert message.meta["usage"]["completion_tokens"] == 40 + def test_run_with_tools_and_response_format(self, tools, mock_parsed_chat_completion): + """ + Test the run method with tools and response format + When tools are used, the function call overrides the schema passed in response_format + """ + with patch("openai.resources.chat.completions.Completions.parse") as mock_chat_completion_parse: + completion = ParsedChatCompletion[CalendarEvent]( + id="foo", + model="gpt-4", + object="chat.completion", + choices=[ + ParsedChoice[CalendarEvent]( + finish_reason="tool_calls", + logprobs=None, + index=0, + message=ParsedChatCompletionMessage[CalendarEvent]( + role="assistant", + tool_calls=[ + ParsedFunctionToolCall( + id="123", + type="function", + function=ParsedFunction(name="weather", arguments='{"city": "Paris"}'), + ) + ], + ), + ) + ], + created=int(datetime.now().timestamp()), + usage=CompletionUsage( + completion_tokens=40, + prompt_tokens=57, + total_tokens=97, + completion_tokens_details=CompletionTokensDetails( + accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0 + ), + prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0), + ), + ) + mock_chat_completion_parse.return_value = completion + + component = OpenAIChatGenerator( + api_key=Secret.from_token("test-api-key"), tools=tools[:1], tools_strict=True + ) + response_with_format = component.run( + [ChatMessage.from_user("What's the weather like in Paris?")], + generation_kwargs={"response_format": CalendarEvent}, + ) + + assert len(response_with_format["replies"]) == 1 + message_with_format = response_with_format["replies"][0] + assert not message_with_format.texts + assert not message_with_format.text + assert message_with_format.tool_calls + tool_call = message_with_format.tool_call + assert isinstance(tool_call, ToolCall) + assert tool_call.tool_name == "weather" + assert tool_call.arguments == {"city": "Paris"} + assert message_with_format.meta["finish_reason"] == "tool_calls" + assert message_with_format.meta["usage"]["completion_tokens"] == 40 + def test_run_with_tools_streaming(self, mock_chat_completion_chunk_with_tools, tools): streaming_callback_called = False @@ -631,6 +752,16 @@ def test_invalid_tool_call_json(self, tools, caplog): assert message.meta["finish_reason"] == "tool_calls" assert message.meta["usage"]["completion_tokens"] == 47 + def test_run_with_response_format_and_streaming_pydantic_model(self, calendar_event_model): + chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] + component = OpenAIChatGenerator( + api_key=Secret.from_token("test-api-key"), + generation_kwargs={"response_format": calendar_event_model}, + streaming_callback=print_streaming_chunk, + ) + with pytest.raises(TypeError): + component.run(chat_messages) + @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", @@ -700,19 +831,6 @@ def test_live_run_with_response_format_json_schema(self): assert "France" in msg["country"] assert message.meta["finish_reason"] == "stop" - @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", - ) - @pytest.mark.integration - def test_live_run_with_response_format_and_streaming_pydantic_model(self, calendar_event_model): - chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] - component = OpenAIChatGenerator( - generation_kwargs={"response_format": calendar_event_model}, streaming_callback=print_streaming_chunk - ) - with pytest.raises(TypeError): - component.run(chat_messages) - @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", @@ -731,6 +849,45 @@ def test_live_run_with_response_format_and_streaming(self, calendar_event_model) assert message.meta["finish_reason"] == "stop" + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_run_with_response_format_with_json_mode(self): + """Test the basic json mode of structured outputs for older gpt models""" + chat_messages = [ + ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.Answer in json format") + ] + component = OpenAIChatGenerator( + generation_kwargs={"response_format": {"type": "json_object"}}, model="gpt-3.5-turbo" + ) + + response = component.run(chat_messages) + print(response) + assert len(response["replies"]) == 1 + message: ChatMessage = response["replies"][0] + assert message.text + assert message.meta["finish_reason"] == "stop" + assert message.meta["usage"]["prompt_tokens"] > 0 + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_run_with_response_format_and_unsupported_model(self, calendar_event_model): + """Test pydantic model in response format with an unsupported model""" + chat_messages = [ + ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.Answer in json format") + ] + component = OpenAIChatGenerator( + generation_kwargs={"response_format": calendar_event_model}, model="gpt-3.5-turbo" + ) + # for older models, this should raise an OpenAIError + with pytest.raises(OpenAIError): + component.run(chat_messages) + def test_run_with_wrong_model(self): mock_client = MagicMock() mock_client.chat.completions.create.side_effect = OpenAIError("Invalid model name") From 333b9e82ef3c5265c74fa28ec0536e3aebdd5a10 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Sep 2025 14:56:03 +0200 Subject: [PATCH 16/26] Update Azure files --- haystack/components/generators/chat/azure.py | 11 ++++++++- test/components/generators/chat/test_azure.py | 24 ++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index 3a6069ce8b..d115a72df3 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -5,7 +5,9 @@ import os from typing import Any, Optional, Union +from openai.lib._pydantic import to_strict_json_schema from openai.lib.azure import AsyncAzureADTokenProvider, AsyncAzureOpenAI, AzureADTokenProvider, AzureOpenAI +from pydantic import BaseModel from haystack import component, default_from_dict, default_to_dict from haystack.components.generators.chat import OpenAIChatGenerator @@ -128,7 +130,9 @@ def __init__( # pylint: disable=too-many-positional-arguments format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). Note: - - This parameter is only supported for latest models starting with GPT-4o. + - This parameter accepts Pydantic models and JSON schemas for latest models starting with GPT-4o. + Older models only support basic version of structured outputs through `{"type": "json_object"}`. + For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode). - For structured outputs with streaming, the `response_format` must be a JSON schema and not a Pydantic model. :param default_headers: Default headers to use for the AzureOpenAI client. @@ -209,6 +213,11 @@ def to_dict(self) -> dict[str, Any]: azure_ad_token_provider_name = None if self.azure_ad_token_provider: azure_ad_token_provider_name = serialize_callable(self.azure_ad_token_provider) + # If the response format is a Pydantic model, its converted to openai's json schema format + # If its already a json schema, it's left as is + response_format = self.generation_kwargs.get("response_format") + if response_format and issubclass(response_format, BaseModel): + self.generation_kwargs["response_format"] = to_strict_json_schema(response_format) return default_to_dict( self, azure_endpoint=self.azure_endpoint, diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index ca0055e7a7..758c9f2043 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -143,7 +143,7 @@ def test_to_dict_default(self, monkeypatch): }, } - def test_to_dict_with_parameters(self, monkeypatch): + def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): monkeypatch.setenv("ENV_VAR", "test-api-key") component = AzureOpenAIChatGenerator( api_key=Secret.from_env_var("ENV_VAR", strict=False), @@ -152,7 +152,11 @@ def test_to_dict_with_parameters(self, monkeypatch): streaming_callback=print_streaming_chunk, timeout=2.5, max_retries=10, - generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + generation_kwargs={ + "max_tokens": 10, + "some_test_param": "test-params", + "response_format": calendar_event_model, + }, azure_ad_token_provider=default_azure_ad_token_provider, http_client_kwargs={"proxy": "http://localhost:8080"}, ) @@ -169,7 +173,21 @@ def test_to_dict_with_parameters(self, monkeypatch): "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "timeout": 2.5, "max_retries": 10, - "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "generation_kwargs": { + "max_tokens": 10, + "some_test_param": "test-params", + "response_format": { + "properties": { + "event_name": {"title": "Event Name", "type": "string"}, + "event_date": {"title": "Event Date", "type": "string"}, + "event_location": {"title": "Event Location", "type": "string"}, + }, + "required": ["event_name", "event_date", "event_location"], + "title": "CalendarEvent", + "type": "object", + "additionalProperties": False, + }, + }, "tools": None, "tools_strict": False, "default_headers": {}, From 6972d77534baf6d81bb534cd2ae8c0797afa8e25 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Sep 2025 15:07:32 +0200 Subject: [PATCH 17/26] PR comments --- haystack/components/generators/chat/azure.py | 4 ++-- haystack/components/generators/chat/openai.py | 6 +++--- test/components/generators/chat/test_azure.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index d115a72df3..e132f31f03 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -125,12 +125,12 @@ def __init__( # pylint: disable=too-many-positional-arguments Higher values make the model less likely to repeat the token. - `logit_bias`: Adds a logit bias to specific tokens. The keys of the dictionary are tokens, and the values are the bias to add to that token. - - `response_format`: A JSON schema or Pydantic model that enforces the structure of the model's response. + - `response_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response. If provided, the output will always be validated against this format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). Note: - - This parameter accepts Pydantic models and JSON schemas for latest models starting with GPT-4o. + - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o. Older models only support basic version of structured outputs through `{"type": "json_object"}`. For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode). - For structured outputs with streaming, diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 6431abe132..4b017d2c27 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -142,12 +142,12 @@ def __init__( # pylint: disable=too-many-positional-arguments Bigger values mean the model will be less likely to repeat the same token in the text. - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the values are the bias to add to that token. - - `response_format`: A JSON schema or Pydantic model that enforces the structure of the model's response. + - `response_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response. If provided, the output will always be validated against this format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). Note: - - This parameter accepts Pydantic models and JSON schemas for latest models starting with GPT-4o. + - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o. Older models only support basic version of structured outputs through `{"type": "json_object"}`. For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode). - For structured outputs with streaming, @@ -308,7 +308,6 @@ def run( chat_completion = self.client.chat.completions.create(**api_args) elif openai_endpoint == "parse": chat_completion = self.client.chat.completions.parse(**api_args) - print(chat_completion) if streaming_callback is not None: completions = self._handle_stream_response( @@ -533,6 +532,7 @@ def _convert_chat_completion_to_chat_message( :return: The ChatMessage. """ message: Union[ChatCompletionMessage, ParsedChatCompletionMessage] = choice.message + print(message) text = message.content tool_calls = [] if message.tool_calls: diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 758c9f2043..1b1a795ebd 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -20,6 +20,17 @@ from haystack.utils.azure import default_azure_ad_token_provider +class CalendarEvent(BaseModel): + event_name: str + event_date: str + event_location: str + + +@pytest.fixture +def calendar_event_model(): + return CalendarEvent + + def get_weather(city: str) -> dict[str, Any]: weather_info = { "Berlin": {"weather": "mostly sunny", "temperature": 7, "unit": "celsius"}, From 5d50f0aaadc554dee10d4b31ed29b80f74de7c63 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Mon, 8 Sep 2025 15:08:55 +0200 Subject: [PATCH 18/26] Small fix --- haystack/components/generators/chat/openai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 4b017d2c27..a6bf0bcf9e 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -532,7 +532,6 @@ def _convert_chat_completion_to_chat_message( :return: The ChatMessage. """ message: Union[ChatCompletionMessage, ParsedChatCompletionMessage] = choice.message - print(message) text = message.content tool_calls = [] if message.tool_calls: From 762dca408ab90f681fde80c5417aeb485a508ffa Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 9 Sep 2025 10:19:33 +0200 Subject: [PATCH 19/26] Include message.parsed --- haystack/components/generators/chat/openai.py | 8 ++++++-- test/components/generators/chat/test_openai.py | 1 - 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index a6bf0bcf9e..055a003b00 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -304,9 +304,9 @@ def run( tools_strict=tools_strict, ) openai_endpoint = api_args.pop("openai_endpoint") - if openai_endpoint == "create": + if openai_endpoint == "parse": chat_completion = self.client.chat.completions.create(**api_args) - elif openai_endpoint == "parse": + else: chat_completion = self.client.chat.completions.parse(**api_args) if streaming_callback is not None: @@ -563,6 +563,10 @@ def _convert_chat_completion_to_chat_message( "usage": _serialize_usage(completion.usage), }, ) + # Using pdantic with structured output, openai also returns the message.parsed + # return the parsed message in the meta + if message.parsed: + chat_message.meta["parsed"] = message.parsed return chat_message diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index b2065821d8..59728db56e 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -864,7 +864,6 @@ def test_run_with_response_format_with_json_mode(self): ) response = component.run(chat_messages) - print(response) assert len(response["replies"]) == 1 message: ChatMessage = response["replies"][0] assert message.text From 95707c7145eabd23cad5e3b0bc6e7c79fc5aa08a Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 9 Sep 2025 11:22:11 +0200 Subject: [PATCH 20/26] Fix seriliaztion --- haystack/components/generators/chat/azure.py | 10 +++++- haystack/components/generators/chat/openai.py | 17 +++++++--- test/components/generators/chat/test_azure.py | 23 +++++++++----- .../components/generators/chat/test_openai.py | 31 +++++++++++++------ 4 files changed, 59 insertions(+), 22 deletions(-) diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index e132f31f03..64ff31fda3 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -217,7 +217,15 @@ def to_dict(self) -> dict[str, Any]: # If its already a json schema, it's left as is response_format = self.generation_kwargs.get("response_format") if response_format and issubclass(response_format, BaseModel): - self.generation_kwargs["response_format"] = to_strict_json_schema(response_format) + json_schema = { + "type": "json_schema", + "json_schema": { + "name": response_format.__name__, + "strict": True, + "schema": to_strict_json_schema(response_format), + }, + } + self.generation_kwargs["response_format"] = json_schema return default_to_dict( self, azure_endpoint=self.azure_endpoint, diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 055a003b00..d1f19516e3 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -220,7 +220,15 @@ def to_dict(self) -> dict[str, Any]: # If the response format is a Pydantic model, its converted to openai's json schema format # If its already a json schema, it's left as is if response_format and issubclass(response_format, BaseModel): - self.generation_kwargs["response_format"] = to_strict_json_schema(response_format) + json_schema = { + "type": "json_schema", + "json_schema": { + "name": response_format.__name__, + "strict": True, + "schema": to_strict_json_schema(response_format), + }, + } + self.generation_kwargs["response_format"] = json_schema return default_to_dict( self, @@ -250,6 +258,7 @@ def from_dict(cls, data: dict[str, Any]) -> "OpenAIChatGenerator": deserialize_tools_or_toolset_inplace(data["init_parameters"], key="tools") init_params = data.get("init_parameters", {}) serialized_callback_handler = init_params.get("streaming_callback") + if serialized_callback_handler: data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler) return default_from_dict(cls, data) @@ -305,9 +314,9 @@ def run( ) openai_endpoint = api_args.pop("openai_endpoint") if openai_endpoint == "parse": - chat_completion = self.client.chat.completions.create(**api_args) - else: chat_completion = self.client.chat.completions.parse(**api_args) + else: + chat_completion = self.client.chat.completions.create(**api_args) if streaming_callback is not None: completions = self._handle_stream_response( @@ -565,7 +574,7 @@ def _convert_chat_completion_to_chat_message( ) # Using pdantic with structured output, openai also returns the message.parsed # return the parsed message in the meta - if message.parsed: + if isinstance(message, ParsedChatCompletionMessage) and message.parsed: chat_message.meta["parsed"] = message.parsed return chat_message diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 1b1a795ebd..15e945b73d 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -188,15 +188,22 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): "max_tokens": 10, "some_test_param": "test-params", "response_format": { - "properties": { - "event_name": {"title": "Event Name", "type": "string"}, - "event_date": {"title": "Event Date", "type": "string"}, - "event_location": {"title": "Event Location", "type": "string"}, + "type": "json_schema", + "json_schema": { + "name": "CalendarEvent", + "strict": True, + "schema": { + "properties": { + "event_name": {"title": "Event Name", "type": "string"}, + "event_date": {"title": "Event Date", "type": "string"}, + "event_location": {"title": "Event Location", "type": "string"}, + }, + "required": ["event_name", "event_date", "event_location"], + "title": "CalendarEvent", + "type": "object", + "additionalProperties": False, + }, }, - "required": ["event_name", "event_date", "event_location"], - "title": "CalendarEvent", - "type": "object", - "additionalProperties": False, }, }, "tools": None, diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index 59728db56e..4d35fa3ace 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -304,15 +304,22 @@ def test_to_dict_with_parameters(self, monkeypatch, calendar_event_model): "max_tokens": 10, "some_test_param": "test-params", "response_format": { - "properties": { - "event_name": {"title": "Event Name", "type": "string"}, - "event_date": {"title": "Event Date", "type": "string"}, - "event_location": {"title": "Event Location", "type": "string"}, + "type": "json_schema", + "json_schema": { + "name": "CalendarEvent", + "strict": True, + "schema": { + "properties": { + "event_name": {"title": "Event Name", "type": "string"}, + "event_date": {"title": "Event Date", "type": "string"}, + "event_location": {"title": "Event Location", "type": "string"}, + }, + "required": ["event_name", "event_date", "event_location"], + "title": "CalendarEvent", + "type": "object", + "additionalProperties": False, + }, }, - "required": ["event_name", "event_date", "event_location"], - "title": "CalendarEvent", - "type": "object", - "additionalProperties": False, }, }, "tools": [ @@ -481,6 +488,9 @@ def test_run_with_response_format(self, chat_messages, mock_parsed_chat_completi assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion + assert "parsed" in response["replies"][0].meta + parsed_output = response["replies"][0].meta["parsed"] + assert isinstance(parsed_output, CalendarEvent) def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed_chat_completion): component = OpenAIChatGenerator(api_key=Secret.from_token("test-api-key")) @@ -491,6 +501,9 @@ def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion + assert "parsed" in response["replies"][0].meta + parsed_output = response["replies"][0].meta["parsed"] + assert isinstance(parsed_output, CalendarEvent) def test_run_with_wrapped_stream_simulation(self, chat_messages, openai_mock_stream): streaming_callback_called = False @@ -857,7 +870,7 @@ def test_live_run_with_response_format_and_streaming(self, calendar_event_model) def test_run_with_response_format_with_json_mode(self): """Test the basic json mode of structured outputs for older gpt models""" chat_messages = [ - ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.Answer in json format") + ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize. Answer in json format") ] component = OpenAIChatGenerator( generation_kwargs={"response_format": {"type": "json_object"}}, model="gpt-3.5-turbo" From c7476d49cdd96e1c7786306147dd7809388a52ce Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 9 Sep 2025 11:24:06 +0200 Subject: [PATCH 21/26] Update the async method --- haystack/components/generators/chat/openai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index d1f19516e3..f0aecbf5ec 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -394,10 +394,10 @@ async def run_async( ) openai_endpoint = api_args.pop("openai_endpoint") - if openai_endpoint == "create": - chat_completion = await self.async_client.chat.completions.create(**api_args) - elif openai_endpoint == "parse": + if openai_endpoint == "parse": chat_completion = await self.async_client.chat.completions.parse(**api_args) + else: + chat_completion = await self.async_client.chat.completions.create(**api_args) if streaming_callback is not None: completions = await self._handle_async_stream_response( From 6c8988d93837b70e7d64f9435cfdfb701f0645a3 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 9 Sep 2025 11:35:46 +0200 Subject: [PATCH 22/26] Update release notes --- .../notes/add-openai-structured-outputs-906be78a984a1079.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml index f1cb9d0d66..09a996e0f6 100644 --- a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -4,6 +4,7 @@ features: `OpenAIChatGenerator` and `AzureOpenAIChatGenerator` now support structured outputs using `response_format` parameter that can be passed in `generation_kwargs`. The `response_format` parameter can be a Pydantic model or a JSON schema for non-streaming responses. For streaming responses, the `response_format` must be a JSON schema. + While using Pydantic models for structured outputs, the parsed output from OpenAI is returned in the `meta` field of the response `ChatMessage`. Example usage of the `response_format` parameter: ```python from pydantic import BaseModel From 868faf19891c9ce00d85dfab2d0475353fcbe2a4 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 9 Sep 2025 11:57:44 +0200 Subject: [PATCH 23/26] Loosen tests to prevent failure --- test/components/generators/chat/test_azure.py | 2 +- test/components/generators/chat/test_openai.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 15e945b73d..247faa8aa8 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -388,7 +388,7 @@ class CalendarEvent(BaseModel): assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "20th Nobel Peace Prize" in msg["event_name"] + assert "Nobel Peace Prize" in msg["event_name"] assert isinstance(msg["event_date"], str) assert isinstance(msg["event_location"], str) diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index 4d35fa3ace..e067a321c9 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -803,11 +803,13 @@ def test_live_run_with_response_format(self, calendar_event_model): assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "20th Nobel Peace Prize" in msg["event_name"] + assert "Nobel Peace Prize" in msg["event_name"] assert isinstance(msg["event_date"], str) assert isinstance(msg["event_location"], str) assert message.meta["finish_reason"] == "stop" + assert "parsed" in message.meta + assert isinstance(message.meta["parsed"], CalendarEvent) @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), @@ -856,7 +858,7 @@ def test_live_run_with_response_format_and_streaming(self, calendar_event_model) assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "20th Nobel Peace Prize" in msg["event_name"] + assert "Nobel Peace Prize" in msg["event_name"] assert isinstance(msg["event_date"], str) assert isinstance(msg["event_location"], str) From 1931316fd88fdb11080ec71517ff8745516c871d Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 16 Sep 2025 09:41:57 +0200 Subject: [PATCH 24/26] PR comments --- haystack/components/generators/chat/azure.py | 13 ++-- haystack/components/generators/chat/openai.py | 60 ++++++++--------- ...i-structured-outputs-906be78a984a1079.yaml | 4 +- test/components/generators/chat/test_azure.py | 6 +- .../components/generators/chat/test_openai.py | 64 ++++--------------- 5 files changed, 51 insertions(+), 96 deletions(-) diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index 64ff31fda3..920ac5552a 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -129,7 +129,7 @@ def __init__( # pylint: disable=too-many-positional-arguments If provided, the output will always be validated against this format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). - Note: + Notes: - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o. Older models only support basic version of structured outputs through `{"type": "json_object"}`. For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode). @@ -213,9 +213,10 @@ def to_dict(self) -> dict[str, Any]: azure_ad_token_provider_name = None if self.azure_ad_token_provider: azure_ad_token_provider_name = serialize_callable(self.azure_ad_token_provider) - # If the response format is a Pydantic model, its converted to openai's json schema format - # If its already a json schema, it's left as is - response_format = self.generation_kwargs.get("response_format") + # If the response format is a Pydantic model, it's converted to openai's json schema format + # If it's already a json schema, it's left as is + generation_kwargs = self.generation_kwargs.copy() + response_format = generation_kwargs.get("response_format") if response_format and issubclass(response_format, BaseModel): json_schema = { "type": "json_schema", @@ -225,7 +226,7 @@ def to_dict(self) -> dict[str, Any]: "schema": to_strict_json_schema(response_format), }, } - self.generation_kwargs["response_format"] = json_schema + generation_kwargs["response_format"] = json_schema return default_to_dict( self, azure_endpoint=self.azure_endpoint, @@ -233,7 +234,7 @@ def to_dict(self) -> dict[str, Any]: organization=self.organization, api_version=self.api_version, streaming_callback=callback_name, - generation_kwargs=self.generation_kwargs, + generation_kwargs=generation_kwargs, timeout=self.timeout, max_retries=self.max_retries, api_key=self.api_key.to_dict() if self.api_key is not None else None, diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index f0aecbf5ec..7b53368fa2 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -146,7 +146,7 @@ def __init__( # pylint: disable=too-many-positional-arguments If provided, the output will always be validated against this format (unless the model returns a tool call). For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). - Note: + Notes: - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o. Older models only support basic version of structured outputs through `{"type": "json_object"}`. For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode). @@ -215,10 +215,11 @@ def to_dict(self) -> dict[str, Any]: The serialized component as a dictionary. """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None - response_format = self.generation_kwargs.get("response_format") + generation_kwargs = self.generation_kwargs.copy() + response_format = generation_kwargs.get("response_format") - # If the response format is a Pydantic model, its converted to openai's json schema format - # If its already a json schema, it's left as is + # If the response format is a Pydantic model, it's converted to openai's json schema format + # If it's already a json schema, it's left as is if response_format and issubclass(response_format, BaseModel): json_schema = { "type": "json_schema", @@ -228,7 +229,7 @@ def to_dict(self) -> dict[str, Any]: "schema": to_strict_json_schema(response_format), }, } - self.generation_kwargs["response_format"] = json_schema + generation_kwargs["response_format"] = json_schema return default_to_dict( self, @@ -236,7 +237,7 @@ def to_dict(self) -> dict[str, Any]: streaming_callback=callback_name, api_base_url=self.api_base_url, organization=self.organization, - generation_kwargs=self.generation_kwargs, + generation_kwargs=generation_kwargs, api_key=self.api_key.to_dict(), timeout=self.timeout, max_retries=self.max_retries, @@ -313,10 +314,8 @@ def run( tools_strict=tools_strict, ) openai_endpoint = api_args.pop("openai_endpoint") - if openai_endpoint == "parse": - chat_completion = self.client.chat.completions.parse(**api_args) - else: - chat_completion = self.client.chat.completions.create(**api_args) + openai_endpoint_method = getattr(self.client.chat.completions, openai_endpoint) + chat_completion = openai_endpoint_method(**api_args) if streaming_callback is not None: completions = self._handle_stream_response( @@ -430,7 +429,13 @@ def _prepare_api_call( # noqa: PLR0913 ) -> dict[str, Any]: # update generation kwargs by merging with the generation kwargs passed to the run method generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})} - response_format = generation_kwargs.get("response_format") if generation_kwargs else None + + is_streaming = streaming_callback is not None + num_responses = generation_kwargs.pop("n", 1) + + if is_streaming and num_responses > 1: + raise ValueError("Cannot stream multiple responses, please set n=1.") + response_format = generation_kwargs.pop("response_format", None) # adapt ChatMessage(s) to the format expected by the OpenAI API openai_formatted_messages = [message.to_openai_dict_format() for message in messages] @@ -452,39 +457,29 @@ def _prepare_api_call( # noqa: PLR0913 tool_definitions.append({"type": "function", "function": function_spec}) openai_tools = {"tools": tool_definitions} - is_streaming = streaming_callback is not None - num_responses = generation_kwargs.pop("n", 1) + base_args = { + "model": self.model, + "messages": openai_formatted_messages, + "n": num_responses, + **openai_tools, + **generation_kwargs, + } if response_format and not is_streaming: # for structured outputs without streaming, we use openai's parse endpoint # Note: `stream` cannot be passed to chat.completions.parse # we pass a key `openai_endpoint` as a hint to the run method to use the parse endpoint # this key will be removed before the API call is made - return { - "model": self.model, - "messages": openai_formatted_messages, - "n": num_responses, - "response_format": response_format, - "openai_endpoint": "parse", - **openai_tools, - **generation_kwargs, - } - - if is_streaming and num_responses > 1: - raise ValueError("Cannot stream multiple responses, please set n=1.") + return {**base_args, "response_format": response_format, "openai_endpoint": "parse"} # for structured outputs with streaming, we use openai's create endpoint # we pass a key `openai_endpoint` as a hint to the run method to use the create endpoint # this key will be removed before the API call is made return { - "model": self.model, - "messages": openai_formatted_messages, + **base_args, "stream": streaming_callback is not None, - "n": num_responses, "response_format": response_format, "openai_endpoint": "create", - **openai_tools, - **generation_kwargs, } def _handle_stream_response(self, chat_completion: Stream, callback: SyncStreamingCallbackT) -> list[ChatMessage]: @@ -572,10 +567,7 @@ def _convert_chat_completion_to_chat_message( "usage": _serialize_usage(completion.usage), }, ) - # Using pdantic with structured output, openai also returns the message.parsed - # return the parsed message in the meta - if isinstance(message, ParsedChatCompletionMessage) and message.parsed: - chat_message.meta["parsed"] = message.parsed + return chat_message diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml index 09a996e0f6..15feb53681 100644 --- a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -24,7 +24,9 @@ features: ) response = client.run(messages=[ - ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize winner.") + ChatMessage.from_user("In 2021, American scientist David Julius received the Nobel Prize in + Physiology or Medicine for his groundbreaking discoveries on how the human body + senses temperature and touch.") ]) print(response) diff --git a/test/components/generators/chat/test_azure.py b/test/components/generators/chat/test_azure.py index 247faa8aa8..f4b797c881 100644 --- a/test/components/generators/chat/test_azure.py +++ b/test/components/generators/chat/test_azure.py @@ -380,7 +380,9 @@ class CalendarEvent(BaseModel): event_date: str event_location: str - chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] + chat_messages = [ + ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.") + ] component = AzureOpenAIChatGenerator( api_version="2024-08-01-preview", generation_kwargs={"response_format": CalendarEvent} ) @@ -388,7 +390,7 @@ class CalendarEvent(BaseModel): assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "Nobel Peace Prize" in msg["event_name"] + assert "Marketing Summit" in msg["event_name"] assert isinstance(msg["event_date"], str) assert isinstance(msg["event_location"], str) diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index e067a321c9..2f8d02647c 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -488,9 +488,6 @@ def test_run_with_response_format(self, chat_messages, mock_parsed_chat_completi assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion - assert "parsed" in response["replies"][0].meta - parsed_output = response["replies"][0].meta["parsed"] - assert isinstance(parsed_output, CalendarEvent) def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed_chat_completion): component = OpenAIChatGenerator(api_key=Secret.from_token("test-api-key")) @@ -501,9 +498,6 @@ def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed assert len(response["replies"]) == 1 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion - assert "parsed" in response["replies"][0].meta - parsed_output = response["replies"][0].meta["parsed"] - assert isinstance(parsed_output, CalendarEvent) def test_run_with_wrapped_stream_simulation(self, chat_messages, openai_mock_stream): streaming_callback_called = False @@ -766,7 +760,9 @@ def test_invalid_tool_call_json(self, tools, caplog): assert message.meta["usage"]["completion_tokens"] == 47 def test_run_with_response_format_and_streaming_pydantic_model(self, calendar_event_model): - chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] + chat_messages = [ + ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.") + ] component = OpenAIChatGenerator( api_key=Secret.from_token("test-api-key"), generation_kwargs={"response_format": calendar_event_model}, @@ -797,20 +793,18 @@ def test_live_run(self): ) @pytest.mark.integration def test_live_run_with_response_format(self, calendar_event_model): - chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] + chat_messages = [ + ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.") + ] component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model}) results = component.run(chat_messages) assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "Nobel Peace Prize" in msg["event_name"] + assert "Marketing Summit" in msg["event_name"] assert isinstance(msg["event_date"], str) assert isinstance(msg["event_location"], str) - assert message.meta["finish_reason"] == "stop" - assert "parsed" in message.meta - assert isinstance(message.meta["parsed"], CalendarEvent) - @pytest.mark.skipif( not os.environ.get("OPENAI_API_KEY", None), reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", @@ -852,56 +846,20 @@ def test_live_run_with_response_format_json_schema(self): ) @pytest.mark.integration def test_live_run_with_response_format_and_streaming(self, calendar_event_model): - chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")] + chat_messages = [ + ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.") + ] component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model}) results = component.run(chat_messages) assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] msg = json.loads(message.text) - assert "Nobel Peace Prize" in msg["event_name"] + assert "Marketing Summit" in msg["event_name"] assert isinstance(msg["event_date"], str) assert isinstance(msg["event_location"], str) assert message.meta["finish_reason"] == "stop" - @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", - ) - @pytest.mark.integration - def test_run_with_response_format_with_json_mode(self): - """Test the basic json mode of structured outputs for older gpt models""" - chat_messages = [ - ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize. Answer in json format") - ] - component = OpenAIChatGenerator( - generation_kwargs={"response_format": {"type": "json_object"}}, model="gpt-3.5-turbo" - ) - - response = component.run(chat_messages) - assert len(response["replies"]) == 1 - message: ChatMessage = response["replies"][0] - assert message.text - assert message.meta["finish_reason"] == "stop" - assert message.meta["usage"]["prompt_tokens"] > 0 - - @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", - ) - @pytest.mark.integration - def test_run_with_response_format_and_unsupported_model(self, calendar_event_model): - """Test pydantic model in response format with an unsupported model""" - chat_messages = [ - ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.Answer in json format") - ] - component = OpenAIChatGenerator( - generation_kwargs={"response_format": calendar_event_model}, model="gpt-3.5-turbo" - ) - # for older models, this should raise an OpenAIError - with pytest.raises(OpenAIError): - component.run(chat_messages) - def test_run_with_wrong_model(self): mock_client = MagicMock() mock_client.chat.completions.create.side_effect = OpenAIError("Invalid model name") From 44b869b947564531bc63ff1b2269b6f1be8f196c Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 16 Sep 2025 10:27:34 +0200 Subject: [PATCH 25/26] Fix release notes --- haystack/components/generators/chat/openai.py | 6 ++---- ...dd-openai-structured-outputs-906be78a984a1079.yaml | 11 ++++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 7b53368fa2..e4acf72677 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -393,10 +393,8 @@ async def run_async( ) openai_endpoint = api_args.pop("openai_endpoint") - if openai_endpoint == "parse": - chat_completion = await self.async_client.chat.completions.parse(**api_args) - else: - chat_completion = await self.async_client.chat.completions.create(**api_args) + openai_endpoint_method = getattr(self.async_client.chat.completions, openai_endpoint) + chat_completion = openai_endpoint_method(**api_args) if streaming_callback is not None: completions = await self._handle_async_stream_response( diff --git a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml index 15feb53681..7e80656d10 100644 --- a/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml +++ b/releasenotes/notes/add-openai-structured-outputs-906be78a984a1079.yaml @@ -4,7 +4,6 @@ features: `OpenAIChatGenerator` and `AzureOpenAIChatGenerator` now support structured outputs using `response_format` parameter that can be passed in `generation_kwargs`. The `response_format` parameter can be a Pydantic model or a JSON schema for non-streaming responses. For streaming responses, the `response_format` must be a JSON schema. - While using Pydantic models for structured outputs, the parsed output from OpenAI is returned in the `meta` field of the response `ChatMessage`. Example usage of the `response_format` parameter: ```python from pydantic import BaseModel @@ -24,10 +23,12 @@ features: ) response = client.run(messages=[ - ChatMessage.from_user("In 2021, American scientist David Julius received the Nobel Prize in - Physiology or Medicine for his groundbreaking discoveries on how the human body - senses temperature and touch.") + ChatMessage.from_user("In 2021, American scientist David Julius received the Nobel Prize in" + " Physiology or Medicine for his groundbreaking discoveries on how the human body" + " senses temperature and touch.") ]) - print(response) + print(response["replies"][0].text) + + >>> {"recipient_name":"David Julius","award_year":2021,"category":"Physiology or Medicine","achievement_description":"David Julius was awarded for his transformative findings regarding the molecular mechanisms underlying the human body's sense of temperature and touch. Through innovative experiments, he identified specific receptors responsible for detecting heat and mechanical stimuli, ranging from gentle touch to pain-inducing pressure.","nationality":"American"} ``` From 065262de06ca9eecbe9cf5232f404f71358ef109 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 16 Sep 2025 10:33:55 +0200 Subject: [PATCH 26/26] Fix error --- haystack/components/generators/chat/openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index e4acf72677..53754d3fb9 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -394,7 +394,7 @@ async def run_async( openai_endpoint = api_args.pop("openai_endpoint") openai_endpoint_method = getattr(self.async_client.chat.completions, openai_endpoint) - chat_completion = openai_endpoint_method(**api_args) + chat_completion = await openai_endpoint_method(**api_args) if streaming_callback is not None: completions = await self._handle_async_stream_response(