Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
PR comments
  • Loading branch information
Amnah199 committed Sep 16, 2025
commit 1931316fd88fdb11080ec71517ff8745516c871d
13 changes: 7 additions & 6 deletions haystack/components/generators/chat/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def __init__( # pylint: disable=too-many-positional-arguments
If provided, the output will always be validated against this
format (unless the model returns a tool call).
For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
Note:
Notes:
- This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
Older models only support basic version of structured outputs through `{"type": "json_object"}`.
For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
Expand Down Expand Up @@ -213,9 +213,10 @@ def to_dict(self) -> dict[str, Any]:
azure_ad_token_provider_name = None
if self.azure_ad_token_provider:
azure_ad_token_provider_name = serialize_callable(self.azure_ad_token_provider)
# If the response format is a Pydantic model, its converted to openai's json schema format
# If its already a json schema, it's left as is
response_format = self.generation_kwargs.get("response_format")
# If the response format is a Pydantic model, it's converted to openai's json schema format
# If it's already a json schema, it's left as is
generation_kwargs = self.generation_kwargs.copy()
response_format = generation_kwargs.get("response_format")
if response_format and issubclass(response_format, BaseModel):
json_schema = {
"type": "json_schema",
Expand All @@ -225,15 +226,15 @@ def to_dict(self) -> dict[str, Any]:
"schema": to_strict_json_schema(response_format),
},
}
self.generation_kwargs["response_format"] = json_schema
generation_kwargs["response_format"] = json_schema
return default_to_dict(
self,
azure_endpoint=self.azure_endpoint,
azure_deployment=self.azure_deployment,
organization=self.organization,
api_version=self.api_version,
streaming_callback=callback_name,
generation_kwargs=self.generation_kwargs,
generation_kwargs=generation_kwargs,
timeout=self.timeout,
max_retries=self.max_retries,
api_key=self.api_key.to_dict() if self.api_key is not None else None,
Expand Down
60 changes: 26 additions & 34 deletions haystack/components/generators/chat/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def __init__( # pylint: disable=too-many-positional-arguments
If provided, the output will always be validated against this
format (unless the model returns a tool call).
For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
Note:
Notes:
- This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
Older models only support basic version of structured outputs through `{"type": "json_object"}`.
For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
Expand Down Expand Up @@ -215,10 +215,11 @@ def to_dict(self) -> dict[str, Any]:
The serialized component as a dictionary.
"""
callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
response_format = self.generation_kwargs.get("response_format")
generation_kwargs = self.generation_kwargs.copy()
response_format = generation_kwargs.get("response_format")

# If the response format is a Pydantic model, its converted to openai's json schema format
# If its already a json schema, it's left as is
# If the response format is a Pydantic model, it's converted to openai's json schema format
# If it's already a json schema, it's left as is
if response_format and issubclass(response_format, BaseModel):
json_schema = {
"type": "json_schema",
Expand All @@ -228,15 +229,15 @@ def to_dict(self) -> dict[str, Any]:
"schema": to_strict_json_schema(response_format),
},
}
self.generation_kwargs["response_format"] = json_schema
generation_kwargs["response_format"] = json_schema

return default_to_dict(
self,
model=self.model,
streaming_callback=callback_name,
api_base_url=self.api_base_url,
organization=self.organization,
generation_kwargs=self.generation_kwargs,
generation_kwargs=generation_kwargs,
api_key=self.api_key.to_dict(),
timeout=self.timeout,
max_retries=self.max_retries,
Expand Down Expand Up @@ -313,10 +314,8 @@ def run(
tools_strict=tools_strict,
)
openai_endpoint = api_args.pop("openai_endpoint")
if openai_endpoint == "parse":
chat_completion = self.client.chat.completions.parse(**api_args)
else:
chat_completion = self.client.chat.completions.create(**api_args)
openai_endpoint_method = getattr(self.client.chat.completions, openai_endpoint)
chat_completion = openai_endpoint_method(**api_args)

if streaming_callback is not None:
completions = self._handle_stream_response(
Expand Down Expand Up @@ -430,7 +429,13 @@ def _prepare_api_call( # noqa: PLR0913
) -> dict[str, Any]:
# update generation kwargs by merging with the generation kwargs passed to the run method
generation_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
response_format = generation_kwargs.get("response_format") if generation_kwargs else None

is_streaming = streaming_callback is not None
num_responses = generation_kwargs.pop("n", 1)

if is_streaming and num_responses > 1:
raise ValueError("Cannot stream multiple responses, please set n=1.")
response_format = generation_kwargs.pop("response_format", None)

# adapt ChatMessage(s) to the format expected by the OpenAI API
openai_formatted_messages = [message.to_openai_dict_format() for message in messages]
Expand All @@ -452,39 +457,29 @@ def _prepare_api_call( # noqa: PLR0913
tool_definitions.append({"type": "function", "function": function_spec})
openai_tools = {"tools": tool_definitions}

is_streaming = streaming_callback is not None
num_responses = generation_kwargs.pop("n", 1)
base_args = {
"model": self.model,
"messages": openai_formatted_messages,
"n": num_responses,
**openai_tools,
**generation_kwargs,
}

if response_format and not is_streaming:
# for structured outputs without streaming, we use openai's parse endpoint
# Note: `stream` cannot be passed to chat.completions.parse
# we pass a key `openai_endpoint` as a hint to the run method to use the parse endpoint
# this key will be removed before the API call is made
return {
"model": self.model,
"messages": openai_formatted_messages,
"n": num_responses,
"response_format": response_format,
"openai_endpoint": "parse",
**openai_tools,
**generation_kwargs,
}

if is_streaming and num_responses > 1:
raise ValueError("Cannot stream multiple responses, please set n=1.")
return {**base_args, "response_format": response_format, "openai_endpoint": "parse"}

# for structured outputs with streaming, we use openai's create endpoint
# we pass a key `openai_endpoint` as a hint to the run method to use the create endpoint
# this key will be removed before the API call is made
return {
"model": self.model,
"messages": openai_formatted_messages,
**base_args,
"stream": streaming_callback is not None,
"n": num_responses,
"response_format": response_format,
"openai_endpoint": "create",
**openai_tools,
**generation_kwargs,
}

def _handle_stream_response(self, chat_completion: Stream, callback: SyncStreamingCallbackT) -> list[ChatMessage]:
Expand Down Expand Up @@ -572,10 +567,7 @@ def _convert_chat_completion_to_chat_message(
"usage": _serialize_usage(completion.usage),
},
)
# Using pdantic with structured output, openai also returns the message.parsed
# return the parsed message in the meta
if isinstance(message, ParsedChatCompletionMessage) and message.parsed:
chat_message.meta["parsed"] = message.parsed

return chat_message


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ features:
)

response = client.run(messages=[
ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize winner.")
ChatMessage.from_user("In 2021, American scientist David Julius received the Nobel Prize in
Physiology or Medicine for his groundbreaking discoveries on how the human body
senses temperature and touch.")
])
print(response)

Expand Down
6 changes: 4 additions & 2 deletions test/components/generators/chat/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,15 +380,17 @@ class CalendarEvent(BaseModel):
event_date: str
event_location: str

chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
chat_messages = [
ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
]
component = AzureOpenAIChatGenerator(
api_version="2024-08-01-preview", generation_kwargs={"response_format": CalendarEvent}
)
results = component.run(chat_messages)
assert len(results["replies"]) == 1
message: ChatMessage = results["replies"][0]
msg = json.loads(message.text)
assert "Nobel Peace Prize" in msg["event_name"]
assert "Marketing Summit" in msg["event_name"]
assert isinstance(msg["event_date"], str)
assert isinstance(msg["event_location"], str)

Expand Down
64 changes: 11 additions & 53 deletions test/components/generators/chat/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,6 @@ def test_run_with_response_format(self, chat_messages, mock_parsed_chat_completi
assert len(response["replies"]) == 1
assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion
assert "parsed" in response["replies"][0].meta
parsed_output = response["replies"][0].meta["parsed"]
assert isinstance(parsed_output, CalendarEvent)

def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed_chat_completion):
component = OpenAIChatGenerator(api_key=Secret.from_token("test-api-key"))
Expand All @@ -501,9 +498,6 @@ def test_run_with_response_format_in_run_method(self, chat_messages, mock_parsed
assert len(response["replies"]) == 1
assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
assert "Team Meeting" in response["replies"][0].text # see mock_parsed_chat_completion
assert "parsed" in response["replies"][0].meta
parsed_output = response["replies"][0].meta["parsed"]
assert isinstance(parsed_output, CalendarEvent)

def test_run_with_wrapped_stream_simulation(self, chat_messages, openai_mock_stream):
streaming_callback_called = False
Expand Down Expand Up @@ -766,7 +760,9 @@ def test_invalid_tool_call_json(self, tools, caplog):
assert message.meta["usage"]["completion_tokens"] == 47

def test_run_with_response_format_and_streaming_pydantic_model(self, calendar_event_model):
chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
chat_messages = [
ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
]
component = OpenAIChatGenerator(
api_key=Secret.from_token("test-api-key"),
generation_kwargs={"response_format": calendar_event_model},
Expand Down Expand Up @@ -797,20 +793,18 @@ def test_live_run(self):
)
@pytest.mark.integration
def test_live_run_with_response_format(self, calendar_event_model):
chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
chat_messages = [
ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
]
component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model})
results = component.run(chat_messages)
assert len(results["replies"]) == 1
message: ChatMessage = results["replies"][0]
msg = json.loads(message.text)
assert "Nobel Peace Prize" in msg["event_name"]
assert "Marketing Summit" in msg["event_name"]
assert isinstance(msg["event_date"], str)
assert isinstance(msg["event_location"], str)

assert message.meta["finish_reason"] == "stop"
assert "parsed" in message.meta
assert isinstance(message.meta["parsed"], CalendarEvent)

@pytest.mark.skipif(
not os.environ.get("OPENAI_API_KEY", None),
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
Expand Down Expand Up @@ -852,56 +846,20 @@ def test_live_run_with_response_format_json_schema(self):
)
@pytest.mark.integration
def test_live_run_with_response_format_and_streaming(self, calendar_event_model):
chat_messages = [ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.")]
chat_messages = [
ChatMessage.from_user("The marketing summit takes place on October12th at the Hilton Hotel downtown.")
]
component = OpenAIChatGenerator(generation_kwargs={"response_format": calendar_event_model})
results = component.run(chat_messages)
assert len(results["replies"]) == 1
message: ChatMessage = results["replies"][0]
msg = json.loads(message.text)
assert "Nobel Peace Prize" in msg["event_name"]
assert "Marketing Summit" in msg["event_name"]
assert isinstance(msg["event_date"], str)
assert isinstance(msg["event_location"], str)

assert message.meta["finish_reason"] == "stop"

@pytest.mark.skipif(
not os.environ.get("OPENAI_API_KEY", None),
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
)
@pytest.mark.integration
def test_run_with_response_format_with_json_mode(self):
"""Test the basic json mode of structured outputs for older gpt models"""
chat_messages = [
ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize. Answer in json format")
]
component = OpenAIChatGenerator(
generation_kwargs={"response_format": {"type": "json_object"}}, model="gpt-3.5-turbo"
)

response = component.run(chat_messages)
assert len(response["replies"]) == 1
message: ChatMessage = response["replies"][0]
assert message.text
assert message.meta["finish_reason"] == "stop"
assert message.meta["usage"]["prompt_tokens"] > 0

@pytest.mark.skipif(
not os.environ.get("OPENAI_API_KEY", None),
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
)
@pytest.mark.integration
def test_run_with_response_format_and_unsupported_model(self, calendar_event_model):
"""Test pydantic model in response format with an unsupported model"""
chat_messages = [
ChatMessage.from_user("Give me information about the 20th Nobel Peace Prize.Answer in json format")
]
component = OpenAIChatGenerator(
generation_kwargs={"response_format": calendar_event_model}, model="gpt-3.5-turbo"
)
# for older models, this should raise an OpenAIError
with pytest.raises(OpenAIError):
component.run(chat_messages)

def test_run_with_wrong_model(self):
mock_client = MagicMock()
mock_client.chat.completions.create.side_effect = OpenAIError("Invalid model name")
Expand Down
Loading