PR comments

deepset-ai · Amnah199 · Sep 16, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
commit 1931316fd88fdb11080ec71517ff8745516c871d
@@ -129,7 +129,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments
                 If provided, the output will always be validated against this
                 format (unless the model returns a tool call).
                 For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
-                Note:
+                Notes:
                 - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
                   Older models only support basic version of structured outputs through `{"type": "json_object"}`.
                   For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
@@ -213,9 +213,10 @@ def to_dict(self) -> dict[str, Any]:
         azure_ad_token_provider_name = None
         if self.azure_ad_token_provider:
             azure_ad_token_provider_name = serialize_callable(self.azure_ad_token_provider)
-        # If the response format is a Pydantic model, its converted to openai's json schema format
-        # If its already a json schema, it's left as is
-        response_format = self.generation_kwargs.get("response_format")
+        # If the response format is a Pydantic model, it's converted to openai's json schema format
+        # If it's already a json schema, it's left as is
+        generation_kwargs = self.generation_kwargs.copy()
+        response_format = generation_kwargs.get("response_format")
         if response_format and issubclass(response_format, BaseModel):
             json_schema = {
                 "type": "json_schema",
@@ -225,15 +226,15 @@ def to_dict(self) -> dict[str, Any]:
                     "schema": to_strict_json_schema(response_format),
                 },
             }
-            self.generation_kwargs["response_format"] = json_schema
+            generation_kwargs["response_format"] = json_schema
         return default_to_dict(
             self,
             azure_endpoint=self.azure_endpoint,
             azure_deployment=self.azure_deployment,
             organization=self.organization,
             api_version=self.api_version,
             streaming_callback=callback_name,
-            generation_kwargs=self.generation_kwargs,
+            generation_kwargs=generation_kwargs,
             timeout=self.timeout,
             max_retries=self.max_retries,
             api_key=self.api_key.to_dict() if self.api_key is not None else None,

@@ -146,7 +146,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments
                 If provided, the output will always be validated against this
                 format (unless the model returns a tool call).
                 For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
-                Note:
+                Notes:
                 - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
                   Older models only support basic version of structured outputs through `{"type": "json_object"}`.
                   For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
@@ -215,10 +215,11 @@ def to_dict(self) -> dict[str, Any]:
             The serialized component as a dictionary.
         """
         callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
-        response_format = self.generation_kwargs.get("response_format")
+        generation_kwargs = self.generation_kwargs.copy()
+        response_format = generation_kwargs.get("response_format")
 
-        # If the response format is a Pydantic model, its converted to openai's json schema format
-        # If its already a json schema, it's left as is
+        # If the response format is a Pydantic model, it's converted to openai's json schema format
+        # If it's already a json schema, it's left as is
         if response_format and issubclass(response_format, BaseModel):
             json_schema = {
                 "type": "json_schema",
@@ -228,15 +229,15 @@ def to_dict(self) -> dict[str, Any]:
                     "schema": to_strict_json_schema(response_format),
                 },
             }
-            self.generation_kwargs["response_format"] = json_schema
+            generation_kwargs["response_format"] = json_schema
 
         return default_to_dict(
             self,
             model=self.model,
             streaming_callback=callback_name,
             api_base_url=self.api_base_url,
             organization=self.organization,
-            generation_kwargs=self.generation_kwargs,
+            generation_kwargs=generation_kwargs,
             api_key=self.api_key.to_dict(),
             timeout=self.timeout,
             max_retries=self.max_retries,
@@ -313,10 +314,8 @@ def run(
             tools_strict=tools_strict,
         )
         openai_endpoint = api_args.pop("openai_endpoint")
-        if openai_endpoint == "parse":
-            chat_completion = self.client.chat.completions.parse(**api_args)
-        else:
-            chat_completion = self.client.chat.completions.create(**api_args)
+        openai_endpoint_method = getattr(self.client.chat.completions, openai_endpoint)
+        chat_completion = openai_endpoint_method(**api_args)
 
         if streaming_callback is not None:
             completions = self._handle_stream_response(
@@ -430,7 +429,13 @@ def _prepare_api_call(  # noqa: PLR0913
     ) -> dict[str, Any]:
         # update generation kwargs by merging with the generation kwargs passed to the run method
         generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
-        response_format = generation_kwargs.get("response_format") if generation_kwargs else None
+
+        is_streaming = streaming_callback is not None
+        num_responses = generation_kwargs.pop("n", 1)
+
+        if is_streaming and num_responses > 1:
+            raise ValueError("Cannot stream multiple responses, please set n=1.")
+        response_format = generation_kwargs.pop("response_format", None)
 
         # adapt ChatMessage(s) to the format expected by the OpenAI API
         openai_formatted_messages = [message.to_openai_dict_format() for message in messages]
@@ -452,39 +457,29 @@ def _prepare_api_call(  # noqa: PLR0913
                 tool_definitions.append({"type": "function", "function": function_spec})
             openai_tools = {"tools": tool_definitions}
 
-        is_streaming = streaming_callback is not None
-        num_responses = generation_kwargs.pop("n", 1)
+        base_args = {
+            "model": self.model,
+            "messages": openai_formatted_messages,
+            "n": num_responses,
+            **openai_tools,
+            **generation_kwargs,
+        }
 
         if response_format and not is_streaming:
             # for structured outputs without streaming, we use openai's parse endpoint
             # Note: `stream` cannot be passed to chat.completions.parse
             # we pass a key `openai_endpoint` as a hint to the run method to use the parse endpoint
             # this key will be removed before the API call is made
-            return {
-                "model": self.model,
-                "messages": openai_formatted_messages,
-                "n": num_responses,
-                "response_format": response_format,
-                "openai_endpoint": "parse",
-                **openai_tools,
-                **generation_kwargs,
-            }
-
-        if is_streaming and num_responses > 1:
-            raise ValueError("Cannot stream multiple responses, please set n=1.")
+            return {**base_args, "response_format": response_format, "openai_endpoint": "parse"}
 
         # for structured outputs with streaming, we use openai's create endpoint
         # we pass a key `openai_endpoint` as a hint to the run method to use the create endpoint
         # this key will be removed before the API call is made
         return {
-            "model": self.model,
-            "messages": openai_formatted_messages,
+            **base_args,
             "stream": streaming_callback is not None,
-            "n": num_responses,
             "response_format": response_format,
             "openai_endpoint": "create",
-            **openai_tools,
-            **generation_kwargs,
         }
 
     def _handle_stream_response(self, chat_completion: Stream, callback: SyncStreamingCallbackT) -> list[ChatMessage]:
@@ -572,10 +567,7 @@ def _convert_chat_completion_to_chat_message(
             "usage": _serialize_usage(completion.usage),
         },
     )
-    # Using pdantic with structured output, openai also returns the message.parsed
-    # return the parsed message in the meta
-    if isinstance(message, ParsedChatCompletionMessage) and message.parsed:
-        chat_message.meta["parsed"] = message.parsed
+
     return chat_message
 
 

@@ -24,7 +24,9 @@ features:
     )
 
     response = client.run(messages=[
-        ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize winner.")
+        ChatMessage.from_user("In 2021, American scientist David Julius received the Nobel Prize in
+        Physiology or Medicine for his groundbreaking discoveries on how the human body
+        senses temperature and touch.")
     ])
     print(response)
 

@@ -380,15 +380,17 @@ class CalendarEvent(BaseModel):
             event_date: str
             event_location: str
 
-        chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
+        chat_messages = [
+            ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
+        ]
         component = AzureOpenAIChatGenerator(
             api_version="2024-08-01-preview", generation_kwargs={"response_format": CalendarEvent}
         )
         results = component.run(chat_messages)
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
         msg = json.loads(message.text)
-        assert "Nobel Peace Prize" in msg["event_name"]
+        assert "Marketing Summit" in msg["event_name"]
         assert isinstance(msg["event_date"], str)
         assert isinstance(msg["event_location"], str)
 

@@ -488,9 +488,6 @@ def test_run_with_response_format(self, chat_messages, mock_parsed_chat_completi
         assert len(response["replies"]) == 1
         assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
         assert "Team Meeting" in response["replies"][0].text  # see mock_parsed_chat_completion
-        assert "parsed" in response["replies"][0].meta
-        parsed_output = response["replies"][0].meta["parsed"]
-        assert isinstance(parsed_output, CalendarEvent)
 
     def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed_chat_completion):
         component = OpenAIChatGenerator(api_key=Secret.from_token("test-api-key"))
@@ -501,9 +498,6 @@ def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed
         assert len(response["replies"]) == 1
         assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
         assert "Team Meeting" in response["replies"][0].text  # see mock_parsed_chat_completion
-        assert "parsed" in response["replies"][0].meta
-        parsed_output = response["replies"][0].meta["parsed"]
-        assert isinstance(parsed_output, CalendarEvent)
 
     def test_run_with_wrapped_stream_simulation(self, chat_messages, openai_mock_stream):
         streaming_callback_called = False
@@ -766,7 +760,9 @@ def test_invalid_tool_call_json(self, tools, caplog):
         assert message.meta["usage"]["completion_tokens"] == 47
 
     def test_run_with_response_format_and_streaming_pydantic_model(self, calendar_event_model):
-        chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
+        chat_messages = [
+            ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
+        ]
         component = OpenAIChatGenerator(
             api_key=Secret.from_token("test-api-key"),
             generation_kwargs={"response_format": calendar_event_model},
@@ -797,20 +793,18 @@ def test_live_run(self):
     )
     @pytest.mark.integration
     def test_live_run_with_response_format(self, calendar_event_model):
-        chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
+        chat_messages = [
+            ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
+        ]
         component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model})
         results = component.run(chat_messages)
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
         msg = json.loads(message.text)
-        assert "Nobel Peace Prize" in msg["event_name"]
+        assert "Marketing Summit" in msg["event_name"]
         assert isinstance(msg["event_date"], str)
         assert isinstance(msg["event_location"], str)
 
-        assert message.meta["finish_reason"] == "stop"
-        assert "parsed" in message.meta
-        assert isinstance(message.meta["parsed"], CalendarEvent)
-
     @pytest.mark.skipif(
         not os.environ.get("OPENAI_API_KEY", None),
         reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
@@ -852,56 +846,20 @@ def test_live_run_with_response_format_json_schema(self):
     )
     @pytest.mark.integration
     def test_live_run_with_response_format_and_streaming(self, calendar_event_model):
-        chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
+        chat_messages = [
+            ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
+        ]
         component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model})
         results = component.run(chat_messages)
         assert len(results["replies"]) == 1
         message: ChatMessage = results["replies"][0]
         msg = json.loads(message.text)
-        assert "Nobel Peace Prize" in msg["event_name"]
+        assert "Marketing Summit" in msg["event_name"]
         assert isinstance(msg["event_date"], str)
         assert isinstance(msg["event_location"], str)
 
         assert message.meta["finish_reason"] == "stop"
 
-    @pytest.mark.skipif(
-        not os.environ.get("OPENAI_API_KEY", None),
-        reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
-    )
-    @pytest.mark.integration
-    def test_run_with_response_format_with_json_mode(self):
-        """Test the basic json mode of structured outputs for older gpt models"""
-        chat_messages = [
-            ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize. Answer in json format")
-        ]
-        component = OpenAIChatGenerator(
-            generation_kwargs={"response_format": {"type": "json_object"}}, model="gpt-3.5-turbo"
-        )
-
-        response = component.run(chat_messages)
-        assert len(response["replies"]) == 1
-        message: ChatMessage = response["replies"][0]
-        assert message.text
-        assert message.meta["finish_reason"] == "stop"
-        assert message.meta["usage"]["prompt_tokens"] > 0
-
-    @pytest.mark.skipif(
-        not os.environ.get("OPENAI_API_KEY", None),
-        reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
-    )
-    @pytest.mark.integration
-    def test_run_with_response_format_and_unsupported_model(self, calendar_event_model):
-        """Test pydantic model in response format with an unsupported model"""
-        chat_messages = [
-            ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.Answer in json format")
-        ]
-        component = OpenAIChatGenerator(
-            generation_kwargs={"response_format": calendar_event_model}, model="gpt-3.5-turbo"
-        )
-        # for older models, this should raise an OpenAIError
-        with pytest.raises(OpenAIError):
-            component.run(chat_messages)
-
     def test_run_with_wrong_model(self):
         mock_client = MagicMock()
         mock_client.chat.completions.create.side_effect = OpenAIError("Invalid model name")