From 9e02ab4728d5b94b1a4848cb20063d087d38d428 Mon Sep 17 00:00:00 2001 From: Gabriel Bianconi <1275491+GabrielBianconi@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:50:07 -0500 Subject: [PATCH 1/2] Support tensorzero_reasoning_content for OpenAI-compatible endpoint --- .../types/chat_completions.rs | 338 +++++++++++++++++- .../openai_compatible/types/streaming.rs | 202 ++++++++++- 2 files changed, 510 insertions(+), 30 deletions(-) diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs index 4b15e42cb1..065c98c232 100644 --- a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs +++ b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs @@ -34,7 +34,7 @@ use crate::inference::types::extra_headers::UnfilteredInferenceExtraHeaders; use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry}; use crate::inference::types::{ Arguments, ContentBlockChatOutput, FinishReason, Input, InputMessage, InputMessageContent, - RawText, Role, System, Template, Text, current_timestamp, + RawText, Role, System, Template, Text, Thought, current_timestamp, }; use crate::tool::{DynamicToolParams, ProviderTool, ToolResult}; use crate::variant::JsonMode; @@ -57,6 +57,8 @@ pub struct OpenAICompatibleUserMessage { pub struct OpenAICompatibleAssistantMessage { pub content: Option, pub tool_calls: Option>, + #[serde(default)] + pub tensorzero_reasoning_content: Option>, } #[derive(Clone, Debug, PartialEq, TensorZeroDeserialize)] @@ -170,8 +172,11 @@ pub struct OpenAICompatibleParams { #[derive(Clone, Debug, PartialEq, Serialize)] pub struct OpenAICompatibleResponseMessage { pub content: Option, + #[serde(skip_serializing_if = "is_none_or_empty")] pub tool_calls: Option>, pub role: String, + #[serde(skip_serializing_if = "is_none_or_empty")] + pub tensorzero_reasoning_content: Option>, } #[derive(Clone, Debug, PartialEq, Serialize)] @@ -224,6 +229,28 @@ pub struct OpenAICompatibleResponse { pub tensorzero_raw_response: Option>, } +// Signature dictated by Serde +#[expect(clippy::ref_option)] +fn is_none_or_empty(v: &Option>) -> bool { + v.as_ref().is_none_or(Vec::is_empty) +} + +/// OpenAI-compatible Thought for responses (with index for position in content array) +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct OpenAICompatibleThought { + pub index: usize, + #[serde(flatten)] + pub thought: Thought, +} + +/// OpenAI-compatible Thought for input (with optional index) +#[derive(Clone, Debug, Deserialize, PartialEq)] +pub struct OpenAICompatibleInputThought { + pub index: Option, + #[serde(flatten)] + pub thought: Thought, +} + // ============================================================================ // Content Block Types // ============================================================================ @@ -552,6 +579,31 @@ pub fn openai_messages_to_input( message_content.push(InputMessageContent::ToolCall(tool_call.into())); } } + // Process reasoning content with index-based insertion + if let Some(thoughts) = msg.tensorzero_reasoning_content { + // Collect indexed thoughts (with explicit index) and unindexed thoughts separately + let mut indexed: Vec<(usize, Thought)> = Vec::new(); + let mut unindexed: Vec = Vec::new(); + for input_thought in thoughts { + match input_thought.index { + Some(idx) => indexed.push((idx, input_thought.thought)), + None => unindexed.push(input_thought.thought), + } + } + + // First, insert thoughts with explicit indices at their specified positions + // Sort by index to handle insertions correctly + indexed.sort_by_key(|(idx, _)| *idx); + for (idx, thought) in indexed { + let idx = idx.min(message_content.len()); + message_content.insert(idx, InputMessageContent::Thought(thought)); + } + + // Then, prepend all thoughts without an index at position 0 (reverse to maintain order) + for thought in unindexed.into_iter().rev() { + message_content.insert(0, InputMessageContent::Thought(thought)); + } + } messages.push(InputMessage { role: Role::Assistant, content: message_content, @@ -689,7 +741,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse ) -> Self { match inference_response { InferenceResponse::Chat(response) => { - let (content, tool_calls) = process_chat_content(response.content); + let (content, tool_calls, thoughts) = process_chat_content(response.content); let tensorzero_original_response = if include_original_response { response.original_response } else { @@ -708,8 +760,17 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse finish_reason: response.finish_reason.unwrap_or(FinishReason::Stop).into(), message: OpenAICompatibleResponseMessage { content, - tool_calls: Some(tool_calls), + tool_calls: if tool_calls.is_empty() { + None + } else { + Some(tool_calls) + }, role: "assistant".to_string(), + tensorzero_reasoning_content: if thoughts.is_empty() { + None + } else { + Some(thoughts) + }, }, }], created: current_timestamp() as u32, @@ -745,6 +806,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse content: response.output.raw, tool_calls: None, role: "assistant".to_string(), + tensorzero_reasoning_content: None, }, }], created: current_timestamp() as u32, @@ -763,14 +825,20 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse } } -/// Takes a vector of ContentBlockOutput and returns a tuple of (Option, Vec). -/// This is useful since the OpenAI format separates text and tool calls in the response fields. +/// Takes a vector of ContentBlockOutput and returns a tuple of +/// (Option, Vec, Vec). +/// This is useful since the OpenAI format separates text, tool calls, and reasoning in the response fields. pub fn process_chat_content( content: Vec, -) -> (Option, Vec) { +) -> ( + Option, + Vec, + Vec, +) { let mut content_str: Option = None; let mut tool_calls = Vec::new(); - for block in content { + let mut thoughts = Vec::new(); + for (index, block) in content.into_iter().enumerate() { match block { ContentBlockChatOutput::Text(text) => match content_str { Some(ref mut content) => content.push_str(&text.text), @@ -779,12 +847,8 @@ pub fn process_chat_content( ContentBlockChatOutput::ToolCall(tool_call) => { tool_calls.push(tool_call.into()); } - ContentBlockChatOutput::Thought(_thought) => { - // OpenAI compatible endpoint does not support thought blocks - // Users of this endpoint will need to check observability to see them - tracing::warn!( - "Ignoring 'thought' content block when constructing OpenAI-compatible response" - ); + ContentBlockChatOutput::Thought(thought) => { + thoughts.push(OpenAICompatibleThought { index, thought }); } ContentBlockChatOutput::Unknown(_) => { tracing::warn!( @@ -793,7 +857,7 @@ pub fn process_chat_content( } } } - (content_str, tool_calls) + (content_str, tool_calls, thoughts) } #[cfg(test)] @@ -862,6 +926,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_reasoning_content: None, }), OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: None, @@ -873,6 +938,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_reasoning_content: None, }), OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: None, @@ -884,6 +950,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_reasoning_content: None, }), OpenAICompatibleMessage::Tool(OpenAICompatibleToolMessage { content: Some(Value::String("Tool result 1".to_string())), @@ -1067,6 +1134,7 @@ mod tests { "city": "Tokyo", }])), tool_calls: None, + tensorzero_reasoning_content: None, }, )]; let input: Input = openai_messages_to_input(messages).unwrap(); @@ -1100,6 +1168,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_reasoning_content: None, }, )]; let input: Input = openai_messages_to_input(messages).unwrap(); @@ -1133,6 +1202,7 @@ mod tests { OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: Some(Value::String("Assistant message".to_string())), tool_calls: None, + tensorzero_reasoning_content: None, }), OpenAICompatibleMessage::System(OpenAICompatibleSystemMessage { content: Value::String("System message".to_string()), @@ -1495,16 +1565,19 @@ mod tests { text: ", world!".to_string(), }), ]; - let (content_str, tool_calls) = process_chat_content(content); + let (content_str, tool_calls, thoughts) = process_chat_content(content); assert_eq!(content_str, Some("Hello, world!".to_string())); assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].id, "1"); assert_eq!(tool_calls[0].function.name, "test_tool"); assert_eq!(tool_calls[0].function.arguments, "{}"); + assert!(thoughts.is_empty()); + let content: Vec = vec![]; - let (content_str, tool_calls) = process_chat_content(content); + let (content_str, tool_calls, thoughts) = process_chat_content(content); assert_eq!(content_str, None); assert!(tool_calls.is_empty()); + assert!(thoughts.is_empty()); let content = vec![ ContentBlockChatOutput::Text(Text { @@ -1527,7 +1600,7 @@ mod tests { text: " fourth part".to_string(), }), ]; - let (content_str, tool_calls) = process_chat_content(content); + let (content_str, tool_calls, thoughts) = process_chat_content(content); assert_eq!( content_str, Some("First part second part third part fourth part".to_string()) @@ -1536,6 +1609,7 @@ mod tests { assert_eq!(tool_calls[0].id, "123"); assert_eq!(tool_calls[0].function.name, "middle_tool"); assert_eq!(tool_calls[0].function.arguments, "{\"key\": \"value\"}"); + assert!(thoughts.is_empty()); } #[test] @@ -1616,4 +1690,234 @@ mod tests { } ); } + + #[test] + fn test_process_chat_content_with_thoughts() { + let content = vec![ + ContentBlockChatOutput::Thought(Thought { + text: Some("Let me think about this...".to_string()), + signature: Some("sig123".to_string()), + summary: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }), + ContentBlockChatOutput::Text(Text { + text: "Hello".to_string(), + }), + ContentBlockChatOutput::Thought(Thought { + text: Some("Another thought".to_string()), + signature: None, + summary: None, + provider_type: None, + extra_data: None, + }), + ]; + let (content_str, tool_calls, thoughts) = process_chat_content(content); + assert_eq!( + content_str, + Some("Hello".to_string()), + "Text content should be extracted" + ); + assert!(tool_calls.is_empty(), "No tool calls expected"); + assert_eq!(thoughts.len(), 2, "Should have two thoughts"); + + // First thought at index 0 + assert_eq!(thoughts[0].index, 0, "First thought should have index 0"); + assert_eq!( + thoughts[0].thought.text, + Some("Let me think about this...".to_string()) + ); + assert_eq!(thoughts[0].thought.signature, Some("sig123".to_string())); + assert_eq!( + thoughts[0].thought.provider_type, + Some("anthropic".to_string()) + ); + + // Second thought at index 2 + assert_eq!(thoughts[1].index, 2, "Second thought should have index 2"); + assert_eq!( + thoughts[1].thought.text, + Some("Another thought".to_string()) + ); + assert_eq!(thoughts[1].thought.signature, None); + } + + #[test] + fn test_input_reasoning_content_ordering() { + // Test with indexed and unindexed thoughts + let messages = vec![OpenAICompatibleMessage::Assistant( + OpenAICompatibleAssistantMessage { + content: Some(Value::String("Response text".to_string())), + tool_calls: None, + tensorzero_reasoning_content: Some(vec![ + // Unindexed thought - should be prepended + OpenAICompatibleInputThought { + index: None, + thought: Thought { + text: Some("Unindexed thought 1".to_string()), + signature: None, + summary: None, + provider_type: None, + extra_data: None, + }, + }, + // Indexed thought at position 2 + OpenAICompatibleInputThought { + index: Some(2), + thought: Thought { + text: Some("Indexed thought at 2".to_string()), + signature: Some("sig456".to_string()), + summary: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }, + }, + // Another unindexed thought - should be prepended + OpenAICompatibleInputThought { + index: None, + thought: Thought { + text: Some("Unindexed thought 2".to_string()), + signature: None, + summary: None, + provider_type: None, + extra_data: None, + }, + }, + // Indexed thought at position 0 + OpenAICompatibleInputThought { + index: Some(0), + thought: Thought { + text: Some("Indexed thought at 0".to_string()), + signature: None, + summary: None, + provider_type: None, + extra_data: None, + }, + }, + ]), + }, + )]; + let input: Input = openai_messages_to_input(messages).unwrap(); + assert_eq!(input.messages.len(), 1, "Should have one message"); + let content = &input.messages[0].content; + + // Expected order based on the algorithm: + // 1. Start with text content: ["Response text"] + // 2. Apply indexed thoughts (sorted by index): + // - Insert at 0: ["Indexed thought at 0", "Response text"] + // - Insert at 2 (capped to len=2): ["Indexed thought at 0", "Response text", "Indexed thought at 2"] + // 3. Prepend unindexed thoughts (reversed to maintain order): + // - Prepend "Unindexed thought 2": ["Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"] + // - Prepend "Unindexed thought 1": ["Unindexed thought 1", "Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"] + assert_eq!(content.len(), 5, "Should have 5 content blocks"); + + // Verify content order + match &content[0] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Unindexed thought 1".to_string()), + "First should be unindexed thought 1" + ); + } + _ => panic!("Expected Thought at position 0"), + } + match &content[1] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Unindexed thought 2".to_string()), + "Second should be unindexed thought 2" + ); + } + _ => panic!("Expected Thought at position 1"), + } + match &content[2] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Indexed thought at 0".to_string()), + "Third should be indexed thought at 0" + ); + } + _ => panic!("Expected Thought at position 2"), + } + match &content[3] { + InputMessageContent::Text(t) => { + assert_eq!(t.text, "Response text", "Fourth should be the text content"); + } + _ => panic!("Expected Text at position 3"), + } + match &content[4] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Indexed thought at 2".to_string()), + "Fifth should be indexed thought at 2" + ); + assert_eq!( + t.signature, + Some("sig456".to_string()), + "Should preserve signature" + ); + assert_eq!( + t.provider_type, + Some("anthropic".to_string()), + "Should preserve provider_type" + ); + } + _ => panic!("Expected Thought at position 4"), + } + } + + #[test] + fn test_openai_compatible_input_thought_deserialization() { + // Test that serde(flatten) works correctly for input thoughts + let json_with_index = json!({ + "index": 5, + "text": "My thought", + "signature": "sig789", + "provider_type": "anthropic" + }); + let thought: OpenAICompatibleInputThought = + serde_json::from_value(json_with_index).unwrap(); + assert_eq!(thought.index, Some(5)); + assert_eq!(thought.thought.text, Some("My thought".to_string())); + assert_eq!(thought.thought.signature, Some("sig789".to_string())); + assert_eq!(thought.thought.provider_type, Some("anthropic".to_string())); + + // Test without index + let json_without_index = json!({ + "text": "Another thought" + }); + let thought: OpenAICompatibleInputThought = + serde_json::from_value(json_without_index).unwrap(); + assert_eq!(thought.index, None); + assert_eq!(thought.thought.text, Some("Another thought".to_string())); + assert_eq!(thought.thought.signature, None); + } + + #[test] + fn test_openai_compatible_thought_serialization() { + // Test that serde(flatten) works correctly for output thoughts + let thought = OpenAICompatibleThought { + index: 3, + thought: Thought { + text: Some("Thinking...".to_string()), + signature: Some("sig_abc".to_string()), + summary: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }, + }; + let json = serde_json::to_value(&thought).unwrap(); + + // With flatten, all fields should be at the top level + assert_eq!(json["index"], 3); + assert_eq!(json["text"], "Thinking..."); + assert_eq!(json["signature"], "sig_abc"); + assert_eq!(json["provider_type"], "anthropic"); + // summary should not be present since it's None and has skip_serializing_if + assert!(json.get("summary").is_none() || json["summary"].is_null()); + } } diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs index 41a599f677..b23fb6afdb 100644 --- a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs +++ b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs @@ -11,6 +11,7 @@ use std::collections::HashMap; use tokio_stream::StreamExt; use crate::error::{Error, ErrorDetails}; +use crate::inference::types::streams::ThoughtChunk; use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry}; use crate::inference::types::{ContentBlockChunk, FinishReason, current_timestamp}; @@ -60,6 +61,14 @@ fn is_none_or_empty(v: &Option>) -> bool { v.as_ref().is_none_or(Vec::is_empty) } +/// OpenAI-compatible ThoughtChunk for streaming responses (with index for position in content array) +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct OpenAICompatibleThoughtChunk { + pub index: usize, + #[serde(flatten)] + pub chunk: ThoughtChunk, +} + #[derive(Clone, Debug, PartialEq, Serialize)] pub struct OpenAICompatibleDelta { #[serde(skip_serializing_if = "Option::is_none")] @@ -68,12 +77,15 @@ pub struct OpenAICompatibleDelta { pub content: Option, #[serde(skip_serializing_if = "is_none_or_empty")] pub tool_calls: Option>, + #[serde(skip_serializing_if = "is_none_or_empty")] + pub tensorzero_reasoning_content: Option>, } #[expect(clippy::too_many_arguments)] pub fn convert_inference_response_chunk_to_openai_compatible( chunk: InferenceResponseChunk, tool_id_to_index: &mut HashMap, + thought_id_to_index: &mut HashMap, response_model_prefix: &str, is_first_chunk: bool, include_usage: bool, @@ -90,7 +102,8 @@ pub fn convert_inference_response_chunk_to_openai_compatible( let response_chunk = match chunk { InferenceResponseChunk::Chat(c) => { - let (content, tool_calls) = process_chat_content_chunk(c.content, tool_id_to_index); + let (content, tool_calls, thoughts) = + process_chat_content_chunk(c.content, tool_id_to_index, thought_id_to_index); let usage = if include_usage { c.usage.map(OpenAICompatibleUsage::from) } else { @@ -123,7 +136,16 @@ pub fn convert_inference_response_chunk_to_openai_compatible( delta: OpenAICompatibleDelta { role: role.clone(), content, - tool_calls: Some(tool_calls), + tool_calls: if tool_calls.is_empty() { + None + } else { + Some(tool_calls) + }, + tensorzero_reasoning_content: if thoughts.is_empty() { + None + } else { + Some(thoughts) + }, }, }], created: current_timestamp() as u32, @@ -172,6 +194,7 @@ pub fn convert_inference_response_chunk_to_openai_compatible( role, content: Some(c.raw), tool_calls: None, + tensorzero_reasoning_content: None, }, }], created: current_timestamp() as u32, @@ -194,9 +217,15 @@ pub fn convert_inference_response_chunk_to_openai_compatible( pub fn process_chat_content_chunk( content: Vec, tool_id_to_index: &mut HashMap, -) -> (Option, Vec) { + thought_id_to_index: &mut HashMap, +) -> ( + Option, + Vec, + Vec, +) { let mut content_str: Option = None; let mut tool_calls = Vec::new(); + let mut thoughts = Vec::new(); for block in content { match block { ContentBlockChunk::Text(text) => match content_str { @@ -217,12 +246,13 @@ pub fn process_chat_content_chunk( }, }); } - ContentBlockChunk::Thought(_thought) => { - // OpenAI compatible endpoint does not support thought blocks - // Users of this endpoint will need to check observability to see them - tracing::warn!( - "Ignoring 'thought' content block chunk when constructing OpenAI-compatible response" - ); + ContentBlockChunk::Thought(thought) => { + let len = thought_id_to_index.len(); + let index = *thought_id_to_index.entry(thought.id.clone()).or_insert(len); + thoughts.push(OpenAICompatibleThoughtChunk { + index, + chunk: thought, + }); } ContentBlockChunk::Unknown(_) => { // OpenAI compatible endpoint does not support unknown blocks @@ -233,7 +263,7 @@ pub fn process_chat_content_chunk( } } } - (content_str, tool_calls) + (content_str, tool_calls, thoughts) } /// Prepares an Event for SSE on the way out of the gateway. @@ -249,6 +279,7 @@ pub fn prepare_serialized_openai_compatible_events( ) -> impl Stream> { async_stream::stream! { let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let mut is_first_chunk = true; while let Some(chunk) = stream.next().await { @@ -261,6 +292,7 @@ pub fn prepare_serialized_openai_compatible_events( let openai_compatible_chunks = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, &response_model_prefix, is_first_chunk, include_usage, @@ -313,9 +345,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, true, // include_usage @@ -369,9 +403,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, true, // include_usage @@ -416,9 +452,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, true, // include_usage @@ -473,9 +511,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, true, // include_usage @@ -513,9 +553,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, true, // include_usage @@ -564,9 +606,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, false, // include_usage = false @@ -606,9 +650,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, false, // include_usage @@ -653,9 +699,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, false, // include_usage @@ -692,9 +740,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut thought_id_to_index, "test_prefix::", true, false, // include_usage @@ -734,18 +784,23 @@ mod tests { }), ]; let mut tool_id_to_index = HashMap::new(); - let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index); + let mut thought_id_to_index = HashMap::new(); + let (content_str, tool_calls, thoughts) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); assert_eq!(content_str, Some("Hello, world!".to_string())); assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].id, Some("1".to_string())); assert_eq!(tool_calls[0].index, 0); assert_eq!(tool_calls[0].function.name, "test_tool".to_string()); assert_eq!(tool_calls[0].function.arguments, "{}"); + assert!(thoughts.is_empty()); let content: Vec = vec![]; - let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index); + let (content_str, tool_calls, thoughts) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); assert_eq!(content_str, None); assert!(tool_calls.is_empty()); + assert!(thoughts.is_empty()); let content = vec![ ContentBlockChunk::Text(TextChunk { @@ -776,7 +831,9 @@ mod tests { }), ]; let mut tool_id_to_index = HashMap::new(); - let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index); + let mut thought_id_to_index = HashMap::new(); + let (content_str, tool_calls, thoughts) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); assert_eq!( content_str, Some("First part second part third part fourth part".to_string()) @@ -790,5 +847,124 @@ mod tests { assert_eq!(tool_calls[1].index, 1); assert_eq!(tool_calls[1].function.name, "last_tool".to_string()); assert_eq!(tool_calls[1].function.arguments, "{\"key\": \"value\"}"); + assert!(thoughts.is_empty()); + } + + #[test] + fn test_process_chat_content_chunk_with_thoughts() { + let content = vec![ + ContentBlockChunk::Thought(ThoughtChunk { + id: "thought_1".to_string(), + text: Some("Let me think...".to_string()), + signature: Some("sig123".to_string()), + summary_id: None, + summary_text: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }), + ContentBlockChunk::Text(TextChunk { + id: "text_1".to_string(), + text: "Response text".to_string(), + }), + ContentBlockChunk::Thought(ThoughtChunk { + id: "thought_1".to_string(), // Same ID, new chunk + text: Some(" more thinking".to_string()), + signature: None, + summary_id: None, + summary_text: None, + provider_type: None, + extra_data: None, + }), + ContentBlockChunk::Thought(ThoughtChunk { + id: "thought_2".to_string(), // New thought + text: Some("Different thought".to_string()), + signature: None, + summary_id: None, + summary_text: None, + provider_type: None, + extra_data: None, + }), + ]; + + let mut tool_id_to_index = HashMap::new(); + let mut thought_id_to_index = HashMap::new(); + let (content_str, tool_calls, thoughts) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); + + assert_eq!( + content_str, + Some("Response text".to_string()), + "Text content should be extracted" + ); + assert!(tool_calls.is_empty(), "No tool calls expected"); + assert_eq!(thoughts.len(), 3, "Should have three thought chunks"); + + // First thought chunk + assert_eq!( + thoughts[0].index, 0, + "First thought should have index 0 (assigned based on thought_1)" + ); + assert_eq!(thoughts[0].chunk.id, "thought_1"); + assert_eq!(thoughts[0].chunk.text, Some("Let me think...".to_string())); + assert_eq!(thoughts[0].chunk.signature, Some("sig123".to_string())); + assert_eq!( + thoughts[0].chunk.provider_type, + Some("anthropic".to_string()) + ); + + // Second thought chunk (same thought ID, should have same index) + assert_eq!( + thoughts[1].index, 0, + "Second chunk of thought_1 should still have index 0" + ); + assert_eq!(thoughts[1].chunk.id, "thought_1"); + assert_eq!(thoughts[1].chunk.text, Some(" more thinking".to_string())); + + // Third thought chunk (new thought ID, should have new index) + assert_eq!(thoughts[2].index, 1, "New thought_2 should have index 1"); + assert_eq!(thoughts[2].chunk.id, "thought_2"); + assert_eq!( + thoughts[2].chunk.text, + Some("Different thought".to_string()) + ); + + // Verify the hashmap tracked the thought IDs correctly + assert_eq!( + thought_id_to_index.get("thought_1"), + Some(&0), + "thought_1 should be mapped to index 0" + ); + assert_eq!( + thought_id_to_index.get("thought_2"), + Some(&1), + "thought_2 should be mapped to index 1" + ); + } + + #[test] + fn test_openai_compatible_thought_chunk_serialization() { + // Test that serde(flatten) works correctly for streaming thought chunks + let chunk = OpenAICompatibleThoughtChunk { + index: 2, + chunk: ThoughtChunk { + id: "chunk_id".to_string(), + text: Some("Thinking...".to_string()), + signature: Some("sig_chunk".to_string()), + summary_id: Some("summary_1".to_string()), + summary_text: Some("Summary text".to_string()), + provider_type: Some("anthropic".to_string()), + extra_data: None, + }, + }; + let json = serde_json::to_value(&chunk).unwrap(); + + // With flatten, all fields should be at the top level + assert_eq!(json["index"], 2); + assert_eq!(json["id"], "chunk_id"); + assert_eq!(json["text"], "Thinking..."); + assert_eq!(json["signature"], "sig_chunk"); + assert_eq!(json["summary_id"], "summary_1"); + assert_eq!(json["summary_text"], "Summary text"); + assert_eq!(json["provider_type"], "anthropic"); } } From 48e0fae5c053cd4ff004103927368f51b77b71c6 Mon Sep 17 00:00:00 2001 From: Gabriel Bianconi <1275491+GabrielBianconi@users.noreply.github.com> Date: Fri, 30 Jan 2026 10:20:40 -0500 Subject: [PATCH 2/2] Fix --- .../types/chat_completions.rs | 428 +++++++++++------- .../openai_compatible/types/streaming.rs | 274 ++++++----- 2 files changed, 448 insertions(+), 254 deletions(-) diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs index 065c98c232..145653406f 100644 --- a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs +++ b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs @@ -34,7 +34,7 @@ use crate::inference::types::extra_headers::UnfilteredInferenceExtraHeaders; use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry}; use crate::inference::types::{ Arguments, ContentBlockChatOutput, FinishReason, Input, InputMessage, InputMessageContent, - RawText, Role, System, Template, Text, Thought, current_timestamp, + RawText, Role, System, Template, Text, Thought, Unknown, current_timestamp, }; use crate::tool::{DynamicToolParams, ProviderTool, ToolResult}; use crate::variant::JsonMode; @@ -58,7 +58,7 @@ pub struct OpenAICompatibleAssistantMessage { pub content: Option, pub tool_calls: Option>, #[serde(default)] - pub tensorzero_reasoning_content: Option>, + pub tensorzero_extra_content_experimental: Option>, } #[derive(Clone, Debug, PartialEq, TensorZeroDeserialize)] @@ -176,7 +176,7 @@ pub struct OpenAICompatibleResponseMessage { pub tool_calls: Option>, pub role: String, #[serde(skip_serializing_if = "is_none_or_empty")] - pub tensorzero_reasoning_content: Option>, + pub tensorzero_extra_content_experimental: Option>, } #[derive(Clone, Debug, PartialEq, Serialize)] @@ -235,20 +235,36 @@ fn is_none_or_empty(v: &Option>) -> bool { v.as_ref().is_none_or(Vec::is_empty) } -/// OpenAI-compatible Thought for responses (with index for position in content array) +/// Extra content block for OpenAI-compatible responses (Thought or Unknown) #[derive(Clone, Debug, PartialEq, Serialize)] -pub struct OpenAICompatibleThought { - pub index: usize, - #[serde(flatten)] - pub thought: Thought, +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ExtraContentBlock { + Thought { + insert_index: usize, + #[serde(flatten)] + thought: Thought, + }, + Unknown { + insert_index: usize, + #[serde(flatten)] + unknown: Unknown, + }, } -/// OpenAI-compatible Thought for input (with optional index) +/// Extra content block for OpenAI-compatible input (with optional insert_index) #[derive(Clone, Debug, Deserialize, PartialEq)] -pub struct OpenAICompatibleInputThought { - pub index: Option, - #[serde(flatten)] - pub thought: Thought, +#[serde(tag = "type", rename_all = "snake_case")] +pub enum InputExtraContentBlock { + Thought { + insert_index: Option, + #[serde(flatten)] + thought: Thought, + }, + Unknown { + insert_index: Option, + #[serde(flatten)] + unknown: Unknown, + }, } // ============================================================================ @@ -579,29 +595,48 @@ pub fn openai_messages_to_input( message_content.push(InputMessageContent::ToolCall(tool_call.into())); } } - // Process reasoning content with index-based insertion - if let Some(thoughts) = msg.tensorzero_reasoning_content { - // Collect indexed thoughts (with explicit index) and unindexed thoughts separately - let mut indexed: Vec<(usize, Thought)> = Vec::new(); - let mut unindexed: Vec = Vec::new(); - for input_thought in thoughts { - match input_thought.index { - Some(idx) => indexed.push((idx, input_thought.thought)), - None => unindexed.push(input_thought.thought), + // Process extra content with index-based insertion + if let Some(extra_content) = msg.tensorzero_extra_content_experimental { + // First pass: items with insert_index (in input order) + for block in &extra_content { + match block { + InputExtraContentBlock::Thought { + insert_index: Some(idx), + thought, + } => { + let idx = (*idx).min(message_content.len()); + message_content + .insert(idx, InputMessageContent::Thought(thought.clone())); + } + InputExtraContentBlock::Unknown { + insert_index: Some(idx), + unknown, + } => { + let idx = (*idx).min(message_content.len()); + message_content + .insert(idx, InputMessageContent::Unknown(unknown.clone())); + } + _ => {} // Handle in second pass } } - // First, insert thoughts with explicit indices at their specified positions - // Sort by index to handle insertions correctly - indexed.sort_by_key(|(idx, _)| *idx); - for (idx, thought) in indexed { - let idx = idx.min(message_content.len()); - message_content.insert(idx, InputMessageContent::Thought(thought)); - } - - // Then, prepend all thoughts without an index at position 0 (reverse to maintain order) - for thought in unindexed.into_iter().rev() { - message_content.insert(0, InputMessageContent::Thought(thought)); + // Second pass: items without insert_index (append to end) + for block in extra_content { + match block { + InputExtraContentBlock::Thought { + insert_index: None, + thought, + } => { + message_content.push(InputMessageContent::Thought(thought)); + } + InputExtraContentBlock::Unknown { + insert_index: None, + unknown, + } => { + message_content.push(InputMessageContent::Unknown(unknown)); + } + _ => {} // Already handled + } } } messages.push(InputMessage { @@ -741,7 +776,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse ) -> Self { match inference_response { InferenceResponse::Chat(response) => { - let (content, tool_calls, thoughts) = process_chat_content(response.content); + let (content, tool_calls, extra_content) = process_chat_content(response.content); let tensorzero_original_response = if include_original_response { response.original_response } else { @@ -766,10 +801,10 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse Some(tool_calls) }, role: "assistant".to_string(), - tensorzero_reasoning_content: if thoughts.is_empty() { + tensorzero_extra_content_experimental: if extra_content.is_empty() { None } else { - Some(thoughts) + Some(extra_content) }, }, }], @@ -806,7 +841,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse content: response.output.raw, tool_calls: None, role: "assistant".to_string(), - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }, }], created: current_timestamp() as u32, @@ -826,19 +861,19 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse } /// Takes a vector of ContentBlockOutput and returns a tuple of -/// (Option, Vec, Vec). -/// This is useful since the OpenAI format separates text, tool calls, and reasoning in the response fields. +/// (Option, Vec, Vec). +/// This is useful since the OpenAI format separates text, tool calls, and extra content in the response fields. pub fn process_chat_content( content: Vec, ) -> ( Option, Vec, - Vec, + Vec, ) { let mut content_str: Option = None; let mut tool_calls = Vec::new(); - let mut thoughts = Vec::new(); - for (index, block) in content.into_iter().enumerate() { + let mut extra_content = Vec::new(); + for (insert_index, block) in content.into_iter().enumerate() { match block { ContentBlockChatOutput::Text(text) => match content_str { Some(ref mut content) => content.push_str(&text.text), @@ -848,16 +883,20 @@ pub fn process_chat_content( tool_calls.push(tool_call.into()); } ContentBlockChatOutput::Thought(thought) => { - thoughts.push(OpenAICompatibleThought { index, thought }); + extra_content.push(ExtraContentBlock::Thought { + insert_index, + thought, + }); } - ContentBlockChatOutput::Unknown(_) => { - tracing::warn!( - "Ignoring 'unknown' content block when constructing OpenAI-compatible response" - ); + ContentBlockChatOutput::Unknown(unknown) => { + extra_content.push(ExtraContentBlock::Unknown { + insert_index, + unknown, + }); } } } - (content_str, tool_calls, thoughts) + (content_str, tool_calls, extra_content) } #[cfg(test)] @@ -926,7 +965,7 @@ mod tests { arguments: "{}".to_string(), }, }]), - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: None, @@ -938,7 +977,7 @@ mod tests { arguments: "{}".to_string(), }, }]), - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: None, @@ -950,7 +989,7 @@ mod tests { arguments: "{}".to_string(), }, }]), - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::Tool(OpenAICompatibleToolMessage { content: Some(Value::String("Tool result 1".to_string())), @@ -1134,7 +1173,7 @@ mod tests { "city": "Tokyo", }])), tool_calls: None, - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }, )]; let input: Input = openai_messages_to_input(messages).unwrap(); @@ -1168,7 +1207,7 @@ mod tests { arguments: "{}".to_string(), }, }]), - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }, )]; let input: Input = openai_messages_to_input(messages).unwrap(); @@ -1202,7 +1241,7 @@ mod tests { OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: Some(Value::String("Assistant message".to_string())), tool_calls: None, - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::System(OpenAICompatibleSystemMessage { content: Value::String("System message".to_string()), @@ -1692,7 +1731,7 @@ mod tests { } #[test] - fn test_process_chat_content_with_thoughts() { + fn test_process_chat_content_with_extra_content() { let content = vec![ ContentBlockChatOutput::Thought(Thought { text: Some("Let me think about this...".to_string()), @@ -1704,6 +1743,11 @@ mod tests { ContentBlockChatOutput::Text(Text { text: "Hello".to_string(), }), + ContentBlockChatOutput::Unknown(Unknown { + data: serde_json::json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }), ContentBlockChatOutput::Thought(Thought { text: Some("Another thought".to_string()), signature: None, @@ -1712,47 +1756,74 @@ mod tests { extra_data: None, }), ]; - let (content_str, tool_calls, thoughts) = process_chat_content(content); + let (content_str, tool_calls, extra_content) = process_chat_content(content); assert_eq!( content_str, Some("Hello".to_string()), "Text content should be extracted" ); assert!(tool_calls.is_empty(), "No tool calls expected"); - assert_eq!(thoughts.len(), 2, "Should have two thoughts"); - - // First thought at index 0 - assert_eq!(thoughts[0].index, 0, "First thought should have index 0"); - assert_eq!( - thoughts[0].thought.text, - Some("Let me think about this...".to_string()) - ); - assert_eq!(thoughts[0].thought.signature, Some("sig123".to_string())); assert_eq!( - thoughts[0].thought.provider_type, - Some("anthropic".to_string()) + extra_content.len(), + 3, + "Should have three extra content blocks" ); - // Second thought at index 2 - assert_eq!(thoughts[1].index, 2, "Second thought should have index 2"); - assert_eq!( - thoughts[1].thought.text, - Some("Another thought".to_string()) - ); - assert_eq!(thoughts[1].thought.signature, None); + // First: Thought at insert_index 0 + match &extra_content[0] { + ExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!(*insert_index, 0, "First thought should have insert_index 0"); + assert_eq!(thought.text, Some("Let me think about this...".to_string())); + assert_eq!(thought.signature, Some("sig123".to_string())); + assert_eq!(thought.provider_type, Some("anthropic".to_string())); + } + ExtraContentBlock::Unknown { .. } => panic!("Expected Thought at position 0"), + } + + // Second: Unknown at insert_index 2 + match &extra_content[1] { + ExtraContentBlock::Unknown { + insert_index, + unknown, + } => { + assert_eq!(*insert_index, 2, "Unknown should have insert_index 2"); + assert_eq!(unknown.data, serde_json::json!({"custom": "data"})); + assert_eq!(unknown.model_name, Some("test_model".to_string())); + } + ExtraContentBlock::Thought { .. } => panic!("Expected Unknown at position 1"), + } + + // Third: Thought at insert_index 3 + match &extra_content[2] { + ExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!( + *insert_index, 3, + "Second thought should have insert_index 3" + ); + assert_eq!(thought.text, Some("Another thought".to_string())); + assert_eq!(thought.signature, None); + } + ExtraContentBlock::Unknown { .. } => panic!("Expected Thought at position 2"), + } } #[test] - fn test_input_reasoning_content_ordering() { - // Test with indexed and unindexed thoughts + fn test_input_extra_content_ordering() { + // Test with indexed and unindexed extra content blocks let messages = vec![OpenAICompatibleMessage::Assistant( OpenAICompatibleAssistantMessage { content: Some(Value::String("Response text".to_string())), tool_calls: None, - tensorzero_reasoning_content: Some(vec![ - // Unindexed thought - should be prepended - OpenAICompatibleInputThought { - index: None, + tensorzero_extra_content_experimental: Some(vec![ + // Unindexed thought - should be appended + InputExtraContentBlock::Thought { + insert_index: None, thought: Thought { text: Some("Unindexed thought 1".to_string()), signature: None, @@ -1762,8 +1833,8 @@ mod tests { }, }, // Indexed thought at position 2 - OpenAICompatibleInputThought { - index: Some(2), + InputExtraContentBlock::Thought { + insert_index: Some(2), thought: Thought { text: Some("Indexed thought at 2".to_string()), signature: Some("sig456".to_string()), @@ -1772,20 +1843,18 @@ mod tests { extra_data: None, }, }, - // Another unindexed thought - should be prepended - OpenAICompatibleInputThought { - index: None, - thought: Thought { - text: Some("Unindexed thought 2".to_string()), - signature: None, - summary: None, - provider_type: None, - extra_data: None, + // Unindexed unknown - should be appended + InputExtraContentBlock::Unknown { + insert_index: None, + unknown: Unknown { + data: serde_json::json!({"test": "data"}), + model_name: None, + provider_name: None, }, }, // Indexed thought at position 0 - OpenAICompatibleInputThought { - index: Some(0), + InputExtraContentBlock::Thought { + insert_index: Some(0), thought: Thought { text: Some("Indexed thought at 0".to_string()), signature: None, @@ -1803,12 +1872,12 @@ mod tests { // Expected order based on the algorithm: // 1. Start with text content: ["Response text"] - // 2. Apply indexed thoughts (sorted by index): - // - Insert at 0: ["Indexed thought at 0", "Response text"] - // - Insert at 2 (capped to len=2): ["Indexed thought at 0", "Response text", "Indexed thought at 2"] - // 3. Prepend unindexed thoughts (reversed to maintain order): - // - Prepend "Unindexed thought 2": ["Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"] - // - Prepend "Unindexed thought 1": ["Unindexed thought 1", "Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"] + // 2. First pass - items with insert_index (in input order): + // - "Indexed thought at 2" insert at min(2, 1)=1: ["Response text", "Indexed at 2"] + // - "Indexed thought at 0" insert at min(0, 2)=0: ["Indexed at 0", "Response text", "Indexed at 2"] + // 3. Second pass - items without insert_index (append to end): + // - Append "Unindexed thought 1": ["Indexed at 0", "Response text", "Indexed at 2", "Unindexed 1"] + // - Append Unknown: ["Indexed at 0", "Response text", "Indexed at 2", "Unindexed 1", Unknown] assert_eq!(content.len(), 5, "Should have 5 content blocks"); // Verify content order @@ -1816,44 +1885,24 @@ mod tests { InputMessageContent::Thought(t) => { assert_eq!( t.text, - Some("Unindexed thought 1".to_string()), - "First should be unindexed thought 1" + Some("Indexed thought at 0".to_string()), + "First should be indexed thought at 0" ); } _ => panic!("Expected Thought at position 0"), } match &content[1] { - InputMessageContent::Thought(t) => { - assert_eq!( - t.text, - Some("Unindexed thought 2".to_string()), - "Second should be unindexed thought 2" - ); - } - _ => panic!("Expected Thought at position 1"), - } - match &content[2] { - InputMessageContent::Thought(t) => { - assert_eq!( - t.text, - Some("Indexed thought at 0".to_string()), - "Third should be indexed thought at 0" - ); - } - _ => panic!("Expected Thought at position 2"), - } - match &content[3] { InputMessageContent::Text(t) => { - assert_eq!(t.text, "Response text", "Fourth should be the text content"); + assert_eq!(t.text, "Response text", "Second should be the text content"); } - _ => panic!("Expected Text at position 3"), + _ => panic!("Expected Text at position 1"), } - match &content[4] { + match &content[2] { InputMessageContent::Thought(t) => { assert_eq!( t.text, Some("Indexed thought at 2".to_string()), - "Fifth should be indexed thought at 2" + "Third should be indexed thought at 2" ); assert_eq!( t.signature, @@ -1866,42 +1915,100 @@ mod tests { "Should preserve provider_type" ); } - _ => panic!("Expected Thought at position 4"), + _ => panic!("Expected Thought at position 2"), + } + match &content[3] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Unindexed thought 1".to_string()), + "Fourth should be unindexed thought 1 (appended)" + ); + } + _ => panic!("Expected Thought at position 3"), + } + match &content[4] { + InputMessageContent::Unknown(u) => { + assert_eq!( + u.data, + serde_json::json!({"test": "data"}), + "Fifth should be the unknown block (appended)" + ); + } + _ => panic!("Expected Unknown at position 4"), } } #[test] - fn test_openai_compatible_input_thought_deserialization() { - // Test that serde(flatten) works correctly for input thoughts - let json_with_index = json!({ - "index": 5, + fn test_input_extra_content_block_deserialization() { + // Test Thought variant deserialization with insert_index + let json_thought_with_index = json!({ + "type": "thought", + "insert_index": 5, "text": "My thought", "signature": "sig789", "provider_type": "anthropic" }); - let thought: OpenAICompatibleInputThought = - serde_json::from_value(json_with_index).unwrap(); - assert_eq!(thought.index, Some(5)); - assert_eq!(thought.thought.text, Some("My thought".to_string())); - assert_eq!(thought.thought.signature, Some("sig789".to_string())); - assert_eq!(thought.thought.provider_type, Some("anthropic".to_string())); - - // Test without index - let json_without_index = json!({ + let block: InputExtraContentBlock = + serde_json::from_value(json_thought_with_index).unwrap(); + match block { + InputExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!(insert_index, Some(5)); + assert_eq!(thought.text, Some("My thought".to_string())); + assert_eq!(thought.signature, Some("sig789".to_string())); + assert_eq!(thought.provider_type, Some("anthropic".to_string())); + } + InputExtraContentBlock::Unknown { .. } => panic!("Expected Thought variant"), + } + + // Test Thought variant deserialization without insert_index + let json_thought_without_index = json!({ + "type": "thought", "text": "Another thought" }); - let thought: OpenAICompatibleInputThought = - serde_json::from_value(json_without_index).unwrap(); - assert_eq!(thought.index, None); - assert_eq!(thought.thought.text, Some("Another thought".to_string())); - assert_eq!(thought.thought.signature, None); + let block: InputExtraContentBlock = + serde_json::from_value(json_thought_without_index).unwrap(); + match block { + InputExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!(insert_index, None); + assert_eq!(thought.text, Some("Another thought".to_string())); + assert_eq!(thought.signature, None); + } + InputExtraContentBlock::Unknown { .. } => panic!("Expected Thought variant"), + } + + // Test Unknown variant deserialization + let json_unknown = json!({ + "type": "unknown", + "insert_index": 2, + "data": {"custom": "value"}, + "model_name": "test_model" + }); + let block: InputExtraContentBlock = serde_json::from_value(json_unknown).unwrap(); + match block { + InputExtraContentBlock::Unknown { + insert_index, + unknown, + } => { + assert_eq!(insert_index, Some(2)); + assert_eq!(unknown.data, json!({"custom": "value"})); + assert_eq!(unknown.model_name, Some("test_model".to_string())); + } + InputExtraContentBlock::Thought { .. } => panic!("Expected Unknown variant"), + } } #[test] - fn test_openai_compatible_thought_serialization() { - // Test that serde(flatten) works correctly for output thoughts - let thought = OpenAICompatibleThought { - index: 3, + fn test_extra_content_block_serialization() { + // Test Thought variant serialization + let thought_block = ExtraContentBlock::Thought { + insert_index: 3, thought: Thought { text: Some("Thinking...".to_string()), signature: Some("sig_abc".to_string()), @@ -1910,14 +2017,29 @@ mod tests { extra_data: None, }, }; - let json = serde_json::to_value(&thought).unwrap(); + let json = serde_json::to_value(&thought_block).unwrap(); - // With flatten, all fields should be at the top level - assert_eq!(json["index"], 3); + // With tagged enum + flatten, type tag and flattened fields at top level + assert_eq!(json["type"], "thought"); + assert_eq!(json["insert_index"], 3); assert_eq!(json["text"], "Thinking..."); assert_eq!(json["signature"], "sig_abc"); assert_eq!(json["provider_type"], "anthropic"); - // summary should not be present since it's None and has skip_serializing_if - assert!(json.get("summary").is_none() || json["summary"].is_null()); + + // Test Unknown variant serialization + let unknown_block = ExtraContentBlock::Unknown { + insert_index: 5, + unknown: Unknown { + data: json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }, + }; + let json = serde_json::to_value(&unknown_block).unwrap(); + + assert_eq!(json["type"], "unknown"); + assert_eq!(json["insert_index"], 5); + assert_eq!(json["data"], json!({"custom": "data"})); + assert_eq!(json["model_name"], "test_model"); } } diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs index b23fb6afdb..b2396bfd9c 100644 --- a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs +++ b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs @@ -11,7 +11,7 @@ use std::collections::HashMap; use tokio_stream::StreamExt; use crate::error::{Error, ErrorDetails}; -use crate::inference::types::streams::ThoughtChunk; +use crate::inference::types::streams::{ThoughtChunk, UnknownChunk}; use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry}; use crate::inference::types::{ContentBlockChunk, FinishReason, current_timestamp}; @@ -61,12 +61,20 @@ fn is_none_or_empty(v: &Option>) -> bool { v.as_ref().is_none_or(Vec::is_empty) } -/// OpenAI-compatible ThoughtChunk for streaming responses (with index for position in content array) +/// Extra content block chunk for streaming responses (Thought or Unknown) #[derive(Clone, Debug, PartialEq, Serialize)] -pub struct OpenAICompatibleThoughtChunk { - pub index: usize, - #[serde(flatten)] - pub chunk: ThoughtChunk, +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ExtraContentBlockChunk { + Thought { + insert_index: usize, + #[serde(flatten)] + chunk: ThoughtChunk, + }, + Unknown { + insert_index: usize, + #[serde(flatten)] + chunk: UnknownChunk, + }, } #[derive(Clone, Debug, PartialEq, Serialize)] @@ -78,14 +86,14 @@ pub struct OpenAICompatibleDelta { #[serde(skip_serializing_if = "is_none_or_empty")] pub tool_calls: Option>, #[serde(skip_serializing_if = "is_none_or_empty")] - pub tensorzero_reasoning_content: Option>, + pub tensorzero_extra_content_experimental: Option>, } #[expect(clippy::too_many_arguments)] pub fn convert_inference_response_chunk_to_openai_compatible( chunk: InferenceResponseChunk, tool_id_to_index: &mut HashMap, - thought_id_to_index: &mut HashMap, + extra_id_to_index: &mut HashMap, response_model_prefix: &str, is_first_chunk: bool, include_usage: bool, @@ -102,8 +110,8 @@ pub fn convert_inference_response_chunk_to_openai_compatible( let response_chunk = match chunk { InferenceResponseChunk::Chat(c) => { - let (content, tool_calls, thoughts) = - process_chat_content_chunk(c.content, tool_id_to_index, thought_id_to_index); + let (content, tool_calls, extra_content) = + process_chat_content_chunk(c.content, tool_id_to_index, extra_id_to_index); let usage = if include_usage { c.usage.map(OpenAICompatibleUsage::from) } else { @@ -141,10 +149,10 @@ pub fn convert_inference_response_chunk_to_openai_compatible( } else { Some(tool_calls) }, - tensorzero_reasoning_content: if thoughts.is_empty() { + tensorzero_extra_content_experimental: if extra_content.is_empty() { None } else { - Some(thoughts) + Some(extra_content) }, }, }], @@ -194,7 +202,7 @@ pub fn convert_inference_response_chunk_to_openai_compatible( role, content: Some(c.raw), tool_calls: None, - tensorzero_reasoning_content: None, + tensorzero_extra_content_experimental: None, }, }], created: current_timestamp() as u32, @@ -217,15 +225,15 @@ pub fn convert_inference_response_chunk_to_openai_compatible( pub fn process_chat_content_chunk( content: Vec, tool_id_to_index: &mut HashMap, - thought_id_to_index: &mut HashMap, + extra_id_to_index: &mut HashMap, ) -> ( Option, Vec, - Vec, + Vec, ) { let mut content_str: Option = None; let mut tool_calls = Vec::new(); - let mut thoughts = Vec::new(); + let mut extra_content = Vec::new(); for block in content { match block { ContentBlockChunk::Text(text) => match content_str { @@ -246,24 +254,25 @@ pub fn process_chat_content_chunk( }, }); } - ContentBlockChunk::Thought(thought) => { - let len = thought_id_to_index.len(); - let index = *thought_id_to_index.entry(thought.id.clone()).or_insert(len); - thoughts.push(OpenAICompatibleThoughtChunk { - index, - chunk: thought, + ContentBlockChunk::Thought(chunk) => { + let len = extra_id_to_index.len(); + let insert_index = *extra_id_to_index.entry(chunk.id.clone()).or_insert(len); + extra_content.push(ExtraContentBlockChunk::Thought { + insert_index, + chunk, }); } - ContentBlockChunk::Unknown(_) => { - // OpenAI compatible endpoint does not support unknown blocks - // Users of this endpoint will need to check observability to see them - tracing::warn!( - "Ignoring 'unknown' content block chunk when constructing OpenAI-compatible response" - ); + ContentBlockChunk::Unknown(chunk) => { + let len = extra_id_to_index.len(); + let insert_index = *extra_id_to_index.entry(chunk.id.clone()).or_insert(len); + extra_content.push(ExtraContentBlockChunk::Unknown { + insert_index, + chunk, + }); } } } - (content_str, tool_calls, thoughts) + (content_str, tool_calls, extra_content) } /// Prepares an Event for SSE on the way out of the gateway. @@ -279,7 +288,7 @@ pub fn prepare_serialized_openai_compatible_events( ) -> impl Stream> { async_stream::stream! { let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let mut is_first_chunk = true; while let Some(chunk) = stream.next().await { @@ -292,7 +301,7 @@ pub fn prepare_serialized_openai_compatible_events( let openai_compatible_chunks = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, &response_model_prefix, is_first_chunk, include_usage, @@ -345,11 +354,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -403,11 +412,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -452,11 +461,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -511,11 +520,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -553,11 +562,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -606,11 +615,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage = false @@ -650,11 +659,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage @@ -699,11 +708,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage @@ -740,11 +749,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, - &mut thought_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage @@ -784,23 +793,23 @@ mod tests { }), ]; let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); - let (content_str, tool_calls, thoughts) = - process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); + let mut extra_id_to_index = HashMap::new(); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); assert_eq!(content_str, Some("Hello, world!".to_string())); assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].id, Some("1".to_string())); assert_eq!(tool_calls[0].index, 0); assert_eq!(tool_calls[0].function.name, "test_tool".to_string()); assert_eq!(tool_calls[0].function.arguments, "{}"); - assert!(thoughts.is_empty()); + assert!(extra_content.is_empty()); let content: Vec = vec![]; - let (content_str, tool_calls, thoughts) = - process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); assert_eq!(content_str, None); assert!(tool_calls.is_empty()); - assert!(thoughts.is_empty()); + assert!(extra_content.is_empty()); let content = vec![ ContentBlockChunk::Text(TextChunk { @@ -831,9 +840,9 @@ mod tests { }), ]; let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); - let (content_str, tool_calls, thoughts) = - process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); + let mut extra_id_to_index = HashMap::new(); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); assert_eq!( content_str, Some("First part second part third part fourth part".to_string()) @@ -847,11 +856,11 @@ mod tests { assert_eq!(tool_calls[1].index, 1); assert_eq!(tool_calls[1].function.name, "last_tool".to_string()); assert_eq!(tool_calls[1].function.arguments, "{\"key\": \"value\"}"); - assert!(thoughts.is_empty()); + assert!(extra_content.is_empty()); } #[test] - fn test_process_chat_content_chunk_with_thoughts() { + fn test_process_chat_content_chunk_with_extra_content() { let content = vec![ ContentBlockChunk::Thought(ThoughtChunk { id: "thought_1".to_string(), @@ -866,6 +875,12 @@ mod tests { id: "text_1".to_string(), text: "Response text".to_string(), }), + ContentBlockChunk::Unknown(UnknownChunk { + id: "unknown_1".to_string(), + data: serde_json::json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }), ContentBlockChunk::Thought(ThoughtChunk { id: "thought_1".to_string(), // Same ID, new chunk text: Some(" more thinking".to_string()), @@ -887,9 +902,9 @@ mod tests { ]; let mut tool_id_to_index = HashMap::new(); - let mut thought_id_to_index = HashMap::new(); - let (content_str, tool_calls, thoughts) = - process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index); + let mut extra_id_to_index = HashMap::new(); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); assert_eq!( content_str, @@ -897,55 +912,93 @@ mod tests { "Text content should be extracted" ); assert!(tool_calls.is_empty(), "No tool calls expected"); - assert_eq!(thoughts.len(), 3, "Should have three thought chunks"); - - // First thought chunk assert_eq!( - thoughts[0].index, 0, - "First thought should have index 0 (assigned based on thought_1)" - ); - assert_eq!(thoughts[0].chunk.id, "thought_1"); - assert_eq!(thoughts[0].chunk.text, Some("Let me think...".to_string())); - assert_eq!(thoughts[0].chunk.signature, Some("sig123".to_string())); - assert_eq!( - thoughts[0].chunk.provider_type, - Some("anthropic".to_string()) + extra_content.len(), + 4, + "Should have four extra content chunks" ); - // Second thought chunk (same thought ID, should have same index) - assert_eq!( - thoughts[1].index, 0, - "Second chunk of thought_1 should still have index 0" - ); - assert_eq!(thoughts[1].chunk.id, "thought_1"); - assert_eq!(thoughts[1].chunk.text, Some(" more thinking".to_string())); + // First: Thought chunk + match &extra_content[0] { + ExtraContentBlockChunk::Thought { + insert_index, + chunk, + } => { + assert_eq!(*insert_index, 0, "First thought should have insert_index 0"); + assert_eq!(chunk.id, "thought_1"); + assert_eq!(chunk.text, Some("Let me think...".to_string())); + assert_eq!(chunk.signature, Some("sig123".to_string())); + assert_eq!(chunk.provider_type, Some("anthropic".to_string())); + } + ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 0"), + } - // Third thought chunk (new thought ID, should have new index) - assert_eq!(thoughts[2].index, 1, "New thought_2 should have index 1"); - assert_eq!(thoughts[2].chunk.id, "thought_2"); - assert_eq!( - thoughts[2].chunk.text, - Some("Different thought".to_string()) - ); + // Second: Unknown chunk + match &extra_content[1] { + ExtraContentBlockChunk::Unknown { + insert_index, + chunk, + } => { + assert_eq!(*insert_index, 1, "Unknown should have insert_index 1"); + assert_eq!(chunk.id, "unknown_1"); + assert_eq!(chunk.data, serde_json::json!({"custom": "data"})); + assert_eq!(chunk.model_name, Some("test_model".to_string())); + } + ExtraContentBlockChunk::Thought { .. } => panic!("Expected Unknown at position 1"), + } - // Verify the hashmap tracked the thought IDs correctly + // Third: Second Thought chunk (same ID, same insert_index) + match &extra_content[2] { + ExtraContentBlockChunk::Thought { + insert_index, + chunk, + } => { + assert_eq!( + *insert_index, 0, + "Second chunk of thought_1 should still have insert_index 0" + ); + assert_eq!(chunk.id, "thought_1"); + assert_eq!(chunk.text, Some(" more thinking".to_string())); + } + ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 2"), + } + + // Fourth: New Thought chunk (new ID, new insert_index) + match &extra_content[3] { + ExtraContentBlockChunk::Thought { + insert_index, + chunk, + } => { + assert_eq!(*insert_index, 2, "New thought_2 should have insert_index 2"); + assert_eq!(chunk.id, "thought_2"); + assert_eq!(chunk.text, Some("Different thought".to_string())); + } + ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 3"), + } + + // Verify the hashmap tracked the IDs correctly assert_eq!( - thought_id_to_index.get("thought_1"), + extra_id_to_index.get("thought_1"), Some(&0), "thought_1 should be mapped to index 0" ); assert_eq!( - thought_id_to_index.get("thought_2"), + extra_id_to_index.get("unknown_1"), Some(&1), - "thought_2 should be mapped to index 1" + "unknown_1 should be mapped to index 1" + ); + assert_eq!( + extra_id_to_index.get("thought_2"), + Some(&2), + "thought_2 should be mapped to index 2" ); } #[test] - fn test_openai_compatible_thought_chunk_serialization() { - // Test that serde(flatten) works correctly for streaming thought chunks - let chunk = OpenAICompatibleThoughtChunk { - index: 2, + fn test_extra_content_block_chunk_serialization() { + // Test Thought variant serialization + let thought_chunk = ExtraContentBlockChunk::Thought { + insert_index: 2, chunk: ThoughtChunk { id: "chunk_id".to_string(), text: Some("Thinking...".to_string()), @@ -956,15 +1009,34 @@ mod tests { extra_data: None, }, }; - let json = serde_json::to_value(&chunk).unwrap(); + let json = serde_json::to_value(&thought_chunk).unwrap(); - // With flatten, all fields should be at the top level - assert_eq!(json["index"], 2); + // With tagged enum + flatten, type tag and flattened fields at top level + assert_eq!(json["type"], "thought"); + assert_eq!(json["insert_index"], 2); assert_eq!(json["id"], "chunk_id"); assert_eq!(json["text"], "Thinking..."); assert_eq!(json["signature"], "sig_chunk"); assert_eq!(json["summary_id"], "summary_1"); assert_eq!(json["summary_text"], "Summary text"); assert_eq!(json["provider_type"], "anthropic"); + + // Test Unknown variant serialization + let unknown_chunk = ExtraContentBlockChunk::Unknown { + insert_index: 5, + chunk: UnknownChunk { + id: "unknown_id".to_string(), + data: serde_json::json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }, + }; + let json = serde_json::to_value(&unknown_chunk).unwrap(); + + assert_eq!(json["type"], "unknown"); + assert_eq!(json["insert_index"], 5); + assert_eq!(json["id"], "unknown_id"); + assert_eq!(json["data"], serde_json::json!({"custom": "data"})); + assert_eq!(json["model_name"], "test_model"); } }