diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs index 4b15e42cb1..145653406f 100644 --- a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs +++ b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs @@ -34,7 +34,7 @@ use crate::inference::types::extra_headers::UnfilteredInferenceExtraHeaders; use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry}; use crate::inference::types::{ Arguments, ContentBlockChatOutput, FinishReason, Input, InputMessage, InputMessageContent, - RawText, Role, System, Template, Text, current_timestamp, + RawText, Role, System, Template, Text, Thought, Unknown, current_timestamp, }; use crate::tool::{DynamicToolParams, ProviderTool, ToolResult}; use crate::variant::JsonMode; @@ -57,6 +57,8 @@ pub struct OpenAICompatibleUserMessage { pub struct OpenAICompatibleAssistantMessage { pub content: Option, pub tool_calls: Option>, + #[serde(default)] + pub tensorzero_extra_content_experimental: Option>, } #[derive(Clone, Debug, PartialEq, TensorZeroDeserialize)] @@ -170,8 +172,11 @@ pub struct OpenAICompatibleParams { #[derive(Clone, Debug, PartialEq, Serialize)] pub struct OpenAICompatibleResponseMessage { pub content: Option, + #[serde(skip_serializing_if = "is_none_or_empty")] pub tool_calls: Option>, pub role: String, + #[serde(skip_serializing_if = "is_none_or_empty")] + pub tensorzero_extra_content_experimental: Option>, } #[derive(Clone, Debug, PartialEq, Serialize)] @@ -224,6 +229,44 @@ pub struct OpenAICompatibleResponse { pub tensorzero_raw_response: Option>, } +// Signature dictated by Serde +#[expect(clippy::ref_option)] +fn is_none_or_empty(v: &Option>) -> bool { + v.as_ref().is_none_or(Vec::is_empty) +} + +/// Extra content block for OpenAI-compatible responses (Thought or Unknown) +#[derive(Clone, Debug, PartialEq, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ExtraContentBlock { + Thought { + insert_index: usize, + #[serde(flatten)] + thought: Thought, + }, + Unknown { + insert_index: usize, + #[serde(flatten)] + unknown: Unknown, + }, +} + +/// Extra content block for OpenAI-compatible input (with optional insert_index) +#[derive(Clone, Debug, Deserialize, PartialEq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum InputExtraContentBlock { + Thought { + insert_index: Option, + #[serde(flatten)] + thought: Thought, + }, + Unknown { + insert_index: Option, + #[serde(flatten)] + unknown: Unknown, + }, +} + // ============================================================================ // Content Block Types // ============================================================================ @@ -552,6 +595,50 @@ pub fn openai_messages_to_input( message_content.push(InputMessageContent::ToolCall(tool_call.into())); } } + // Process extra content with index-based insertion + if let Some(extra_content) = msg.tensorzero_extra_content_experimental { + // First pass: items with insert_index (in input order) + for block in &extra_content { + match block { + InputExtraContentBlock::Thought { + insert_index: Some(idx), + thought, + } => { + let idx = (*idx).min(message_content.len()); + message_content + .insert(idx, InputMessageContent::Thought(thought.clone())); + } + InputExtraContentBlock::Unknown { + insert_index: Some(idx), + unknown, + } => { + let idx = (*idx).min(message_content.len()); + message_content + .insert(idx, InputMessageContent::Unknown(unknown.clone())); + } + _ => {} // Handle in second pass + } + } + + // Second pass: items without insert_index (append to end) + for block in extra_content { + match block { + InputExtraContentBlock::Thought { + insert_index: None, + thought, + } => { + message_content.push(InputMessageContent::Thought(thought)); + } + InputExtraContentBlock::Unknown { + insert_index: None, + unknown, + } => { + message_content.push(InputMessageContent::Unknown(unknown)); + } + _ => {} // Already handled + } + } + } messages.push(InputMessage { role: Role::Assistant, content: message_content, @@ -689,7 +776,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse ) -> Self { match inference_response { InferenceResponse::Chat(response) => { - let (content, tool_calls) = process_chat_content(response.content); + let (content, tool_calls, extra_content) = process_chat_content(response.content); let tensorzero_original_response = if include_original_response { response.original_response } else { @@ -708,8 +795,17 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse finish_reason: response.finish_reason.unwrap_or(FinishReason::Stop).into(), message: OpenAICompatibleResponseMessage { content, - tool_calls: Some(tool_calls), + tool_calls: if tool_calls.is_empty() { + None + } else { + Some(tool_calls) + }, role: "assistant".to_string(), + tensorzero_extra_content_experimental: if extra_content.is_empty() { + None + } else { + Some(extra_content) + }, }, }], created: current_timestamp() as u32, @@ -745,6 +841,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse content: response.output.raw, tool_calls: None, role: "assistant".to_string(), + tensorzero_extra_content_experimental: None, }, }], created: current_timestamp() as u32, @@ -763,14 +860,20 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse } } -/// Takes a vector of ContentBlockOutput and returns a tuple of (Option, Vec). -/// This is useful since the OpenAI format separates text and tool calls in the response fields. +/// Takes a vector of ContentBlockOutput and returns a tuple of +/// (Option, Vec, Vec). +/// This is useful since the OpenAI format separates text, tool calls, and extra content in the response fields. pub fn process_chat_content( content: Vec, -) -> (Option, Vec) { +) -> ( + Option, + Vec, + Vec, +) { let mut content_str: Option = None; let mut tool_calls = Vec::new(); - for block in content { + let mut extra_content = Vec::new(); + for (insert_index, block) in content.into_iter().enumerate() { match block { ContentBlockChatOutput::Text(text) => match content_str { Some(ref mut content) => content.push_str(&text.text), @@ -779,21 +882,21 @@ pub fn process_chat_content( ContentBlockChatOutput::ToolCall(tool_call) => { tool_calls.push(tool_call.into()); } - ContentBlockChatOutput::Thought(_thought) => { - // OpenAI compatible endpoint does not support thought blocks - // Users of this endpoint will need to check observability to see them - tracing::warn!( - "Ignoring 'thought' content block when constructing OpenAI-compatible response" - ); + ContentBlockChatOutput::Thought(thought) => { + extra_content.push(ExtraContentBlock::Thought { + insert_index, + thought, + }); } - ContentBlockChatOutput::Unknown(_) => { - tracing::warn!( - "Ignoring 'unknown' content block when constructing OpenAI-compatible response" - ); + ContentBlockChatOutput::Unknown(unknown) => { + extra_content.push(ExtraContentBlock::Unknown { + insert_index, + unknown, + }); } } } - (content_str, tool_calls) + (content_str, tool_calls, extra_content) } #[cfg(test)] @@ -862,6 +965,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: None, @@ -873,6 +977,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: None, @@ -884,6 +989,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::Tool(OpenAICompatibleToolMessage { content: Some(Value::String("Tool result 1".to_string())), @@ -1067,6 +1173,7 @@ mod tests { "city": "Tokyo", }])), tool_calls: None, + tensorzero_extra_content_experimental: None, }, )]; let input: Input = openai_messages_to_input(messages).unwrap(); @@ -1100,6 +1207,7 @@ mod tests { arguments: "{}".to_string(), }, }]), + tensorzero_extra_content_experimental: None, }, )]; let input: Input = openai_messages_to_input(messages).unwrap(); @@ -1133,6 +1241,7 @@ mod tests { OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage { content: Some(Value::String("Assistant message".to_string())), tool_calls: None, + tensorzero_extra_content_experimental: None, }), OpenAICompatibleMessage::System(OpenAICompatibleSystemMessage { content: Value::String("System message".to_string()), @@ -1495,16 +1604,19 @@ mod tests { text: ", world!".to_string(), }), ]; - let (content_str, tool_calls) = process_chat_content(content); + let (content_str, tool_calls, thoughts) = process_chat_content(content); assert_eq!(content_str, Some("Hello, world!".to_string())); assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].id, "1"); assert_eq!(tool_calls[0].function.name, "test_tool"); assert_eq!(tool_calls[0].function.arguments, "{}"); + assert!(thoughts.is_empty()); + let content: Vec = vec![]; - let (content_str, tool_calls) = process_chat_content(content); + let (content_str, tool_calls, thoughts) = process_chat_content(content); assert_eq!(content_str, None); assert!(tool_calls.is_empty()); + assert!(thoughts.is_empty()); let content = vec![ ContentBlockChatOutput::Text(Text { @@ -1527,7 +1639,7 @@ mod tests { text: " fourth part".to_string(), }), ]; - let (content_str, tool_calls) = process_chat_content(content); + let (content_str, tool_calls, thoughts) = process_chat_content(content); assert_eq!( content_str, Some("First part second part third part fourth part".to_string()) @@ -1536,6 +1648,7 @@ mod tests { assert_eq!(tool_calls[0].id, "123"); assert_eq!(tool_calls[0].function.name, "middle_tool"); assert_eq!(tool_calls[0].function.arguments, "{\"key\": \"value\"}"); + assert!(thoughts.is_empty()); } #[test] @@ -1616,4 +1729,317 @@ mod tests { } ); } + + #[test] + fn test_process_chat_content_with_extra_content() { + let content = vec![ + ContentBlockChatOutput::Thought(Thought { + text: Some("Let me think about this...".to_string()), + signature: Some("sig123".to_string()), + summary: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }), + ContentBlockChatOutput::Text(Text { + text: "Hello".to_string(), + }), + ContentBlockChatOutput::Unknown(Unknown { + data: serde_json::json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }), + ContentBlockChatOutput::Thought(Thought { + text: Some("Another thought".to_string()), + signature: None, + summary: None, + provider_type: None, + extra_data: None, + }), + ]; + let (content_str, tool_calls, extra_content) = process_chat_content(content); + assert_eq!( + content_str, + Some("Hello".to_string()), + "Text content should be extracted" + ); + assert!(tool_calls.is_empty(), "No tool calls expected"); + assert_eq!( + extra_content.len(), + 3, + "Should have three extra content blocks" + ); + + // First: Thought at insert_index 0 + match &extra_content[0] { + ExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!(*insert_index, 0, "First thought should have insert_index 0"); + assert_eq!(thought.text, Some("Let me think about this...".to_string())); + assert_eq!(thought.signature, Some("sig123".to_string())); + assert_eq!(thought.provider_type, Some("anthropic".to_string())); + } + ExtraContentBlock::Unknown { .. } => panic!("Expected Thought at position 0"), + } + + // Second: Unknown at insert_index 2 + match &extra_content[1] { + ExtraContentBlock::Unknown { + insert_index, + unknown, + } => { + assert_eq!(*insert_index, 2, "Unknown should have insert_index 2"); + assert_eq!(unknown.data, serde_json::json!({"custom": "data"})); + assert_eq!(unknown.model_name, Some("test_model".to_string())); + } + ExtraContentBlock::Thought { .. } => panic!("Expected Unknown at position 1"), + } + + // Third: Thought at insert_index 3 + match &extra_content[2] { + ExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!( + *insert_index, 3, + "Second thought should have insert_index 3" + ); + assert_eq!(thought.text, Some("Another thought".to_string())); + assert_eq!(thought.signature, None); + } + ExtraContentBlock::Unknown { .. } => panic!("Expected Thought at position 2"), + } + } + + #[test] + fn test_input_extra_content_ordering() { + // Test with indexed and unindexed extra content blocks + let messages = vec![OpenAICompatibleMessage::Assistant( + OpenAICompatibleAssistantMessage { + content: Some(Value::String("Response text".to_string())), + tool_calls: None, + tensorzero_extra_content_experimental: Some(vec![ + // Unindexed thought - should be appended + InputExtraContentBlock::Thought { + insert_index: None, + thought: Thought { + text: Some("Unindexed thought 1".to_string()), + signature: None, + summary: None, + provider_type: None, + extra_data: None, + }, + }, + // Indexed thought at position 2 + InputExtraContentBlock::Thought { + insert_index: Some(2), + thought: Thought { + text: Some("Indexed thought at 2".to_string()), + signature: Some("sig456".to_string()), + summary: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }, + }, + // Unindexed unknown - should be appended + InputExtraContentBlock::Unknown { + insert_index: None, + unknown: Unknown { + data: serde_json::json!({"test": "data"}), + model_name: None, + provider_name: None, + }, + }, + // Indexed thought at position 0 + InputExtraContentBlock::Thought { + insert_index: Some(0), + thought: Thought { + text: Some("Indexed thought at 0".to_string()), + signature: None, + summary: None, + provider_type: None, + extra_data: None, + }, + }, + ]), + }, + )]; + let input: Input = openai_messages_to_input(messages).unwrap(); + assert_eq!(input.messages.len(), 1, "Should have one message"); + let content = &input.messages[0].content; + + // Expected order based on the algorithm: + // 1. Start with text content: ["Response text"] + // 2. First pass - items with insert_index (in input order): + // - "Indexed thought at 2" insert at min(2, 1)=1: ["Response text", "Indexed at 2"] + // - "Indexed thought at 0" insert at min(0, 2)=0: ["Indexed at 0", "Response text", "Indexed at 2"] + // 3. Second pass - items without insert_index (append to end): + // - Append "Unindexed thought 1": ["Indexed at 0", "Response text", "Indexed at 2", "Unindexed 1"] + // - Append Unknown: ["Indexed at 0", "Response text", "Indexed at 2", "Unindexed 1", Unknown] + assert_eq!(content.len(), 5, "Should have 5 content blocks"); + + // Verify content order + match &content[0] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Indexed thought at 0".to_string()), + "First should be indexed thought at 0" + ); + } + _ => panic!("Expected Thought at position 0"), + } + match &content[1] { + InputMessageContent::Text(t) => { + assert_eq!(t.text, "Response text", "Second should be the text content"); + } + _ => panic!("Expected Text at position 1"), + } + match &content[2] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Indexed thought at 2".to_string()), + "Third should be indexed thought at 2" + ); + assert_eq!( + t.signature, + Some("sig456".to_string()), + "Should preserve signature" + ); + assert_eq!( + t.provider_type, + Some("anthropic".to_string()), + "Should preserve provider_type" + ); + } + _ => panic!("Expected Thought at position 2"), + } + match &content[3] { + InputMessageContent::Thought(t) => { + assert_eq!( + t.text, + Some("Unindexed thought 1".to_string()), + "Fourth should be unindexed thought 1 (appended)" + ); + } + _ => panic!("Expected Thought at position 3"), + } + match &content[4] { + InputMessageContent::Unknown(u) => { + assert_eq!( + u.data, + serde_json::json!({"test": "data"}), + "Fifth should be the unknown block (appended)" + ); + } + _ => panic!("Expected Unknown at position 4"), + } + } + + #[test] + fn test_input_extra_content_block_deserialization() { + // Test Thought variant deserialization with insert_index + let json_thought_with_index = json!({ + "type": "thought", + "insert_index": 5, + "text": "My thought", + "signature": "sig789", + "provider_type": "anthropic" + }); + let block: InputExtraContentBlock = + serde_json::from_value(json_thought_with_index).unwrap(); + match block { + InputExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!(insert_index, Some(5)); + assert_eq!(thought.text, Some("My thought".to_string())); + assert_eq!(thought.signature, Some("sig789".to_string())); + assert_eq!(thought.provider_type, Some("anthropic".to_string())); + } + InputExtraContentBlock::Unknown { .. } => panic!("Expected Thought variant"), + } + + // Test Thought variant deserialization without insert_index + let json_thought_without_index = json!({ + "type": "thought", + "text": "Another thought" + }); + let block: InputExtraContentBlock = + serde_json::from_value(json_thought_without_index).unwrap(); + match block { + InputExtraContentBlock::Thought { + insert_index, + thought, + } => { + assert_eq!(insert_index, None); + assert_eq!(thought.text, Some("Another thought".to_string())); + assert_eq!(thought.signature, None); + } + InputExtraContentBlock::Unknown { .. } => panic!("Expected Thought variant"), + } + + // Test Unknown variant deserialization + let json_unknown = json!({ + "type": "unknown", + "insert_index": 2, + "data": {"custom": "value"}, + "model_name": "test_model" + }); + let block: InputExtraContentBlock = serde_json::from_value(json_unknown).unwrap(); + match block { + InputExtraContentBlock::Unknown { + insert_index, + unknown, + } => { + assert_eq!(insert_index, Some(2)); + assert_eq!(unknown.data, json!({"custom": "value"})); + assert_eq!(unknown.model_name, Some("test_model".to_string())); + } + InputExtraContentBlock::Thought { .. } => panic!("Expected Unknown variant"), + } + } + + #[test] + fn test_extra_content_block_serialization() { + // Test Thought variant serialization + let thought_block = ExtraContentBlock::Thought { + insert_index: 3, + thought: Thought { + text: Some("Thinking...".to_string()), + signature: Some("sig_abc".to_string()), + summary: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }, + }; + let json = serde_json::to_value(&thought_block).unwrap(); + + // With tagged enum + flatten, type tag and flattened fields at top level + assert_eq!(json["type"], "thought"); + assert_eq!(json["insert_index"], 3); + assert_eq!(json["text"], "Thinking..."); + assert_eq!(json["signature"], "sig_abc"); + assert_eq!(json["provider_type"], "anthropic"); + + // Test Unknown variant serialization + let unknown_block = ExtraContentBlock::Unknown { + insert_index: 5, + unknown: Unknown { + data: json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }, + }; + let json = serde_json::to_value(&unknown_block).unwrap(); + + assert_eq!(json["type"], "unknown"); + assert_eq!(json["insert_index"], 5); + assert_eq!(json["data"], json!({"custom": "data"})); + assert_eq!(json["model_name"], "test_model"); + } } diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs index 41a599f677..b2396bfd9c 100644 --- a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs +++ b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs @@ -11,6 +11,7 @@ use std::collections::HashMap; use tokio_stream::StreamExt; use crate::error::{Error, ErrorDetails}; +use crate::inference::types::streams::{ThoughtChunk, UnknownChunk}; use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry}; use crate::inference::types::{ContentBlockChunk, FinishReason, current_timestamp}; @@ -60,6 +61,22 @@ fn is_none_or_empty(v: &Option>) -> bool { v.as_ref().is_none_or(Vec::is_empty) } +/// Extra content block chunk for streaming responses (Thought or Unknown) +#[derive(Clone, Debug, PartialEq, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ExtraContentBlockChunk { + Thought { + insert_index: usize, + #[serde(flatten)] + chunk: ThoughtChunk, + }, + Unknown { + insert_index: usize, + #[serde(flatten)] + chunk: UnknownChunk, + }, +} + #[derive(Clone, Debug, PartialEq, Serialize)] pub struct OpenAICompatibleDelta { #[serde(skip_serializing_if = "Option::is_none")] @@ -68,12 +85,15 @@ pub struct OpenAICompatibleDelta { pub content: Option, #[serde(skip_serializing_if = "is_none_or_empty")] pub tool_calls: Option>, + #[serde(skip_serializing_if = "is_none_or_empty")] + pub tensorzero_extra_content_experimental: Option>, } #[expect(clippy::too_many_arguments)] pub fn convert_inference_response_chunk_to_openai_compatible( chunk: InferenceResponseChunk, tool_id_to_index: &mut HashMap, + extra_id_to_index: &mut HashMap, response_model_prefix: &str, is_first_chunk: bool, include_usage: bool, @@ -90,7 +110,8 @@ pub fn convert_inference_response_chunk_to_openai_compatible( let response_chunk = match chunk { InferenceResponseChunk::Chat(c) => { - let (content, tool_calls) = process_chat_content_chunk(c.content, tool_id_to_index); + let (content, tool_calls, extra_content) = + process_chat_content_chunk(c.content, tool_id_to_index, extra_id_to_index); let usage = if include_usage { c.usage.map(OpenAICompatibleUsage::from) } else { @@ -123,7 +144,16 @@ pub fn convert_inference_response_chunk_to_openai_compatible( delta: OpenAICompatibleDelta { role: role.clone(), content, - tool_calls: Some(tool_calls), + tool_calls: if tool_calls.is_empty() { + None + } else { + Some(tool_calls) + }, + tensorzero_extra_content_experimental: if extra_content.is_empty() { + None + } else { + Some(extra_content) + }, }, }], created: current_timestamp() as u32, @@ -172,6 +202,7 @@ pub fn convert_inference_response_chunk_to_openai_compatible( role, content: Some(c.raw), tool_calls: None, + tensorzero_extra_content_experimental: None, }, }], created: current_timestamp() as u32, @@ -194,9 +225,15 @@ pub fn convert_inference_response_chunk_to_openai_compatible( pub fn process_chat_content_chunk( content: Vec, tool_id_to_index: &mut HashMap, -) -> (Option, Vec) { + extra_id_to_index: &mut HashMap, +) -> ( + Option, + Vec, + Vec, +) { let mut content_str: Option = None; let mut tool_calls = Vec::new(); + let mut extra_content = Vec::new(); for block in content { match block { ContentBlockChunk::Text(text) => match content_str { @@ -217,23 +254,25 @@ pub fn process_chat_content_chunk( }, }); } - ContentBlockChunk::Thought(_thought) => { - // OpenAI compatible endpoint does not support thought blocks - // Users of this endpoint will need to check observability to see them - tracing::warn!( - "Ignoring 'thought' content block chunk when constructing OpenAI-compatible response" - ); + ContentBlockChunk::Thought(chunk) => { + let len = extra_id_to_index.len(); + let insert_index = *extra_id_to_index.entry(chunk.id.clone()).or_insert(len); + extra_content.push(ExtraContentBlockChunk::Thought { + insert_index, + chunk, + }); } - ContentBlockChunk::Unknown(_) => { - // OpenAI compatible endpoint does not support unknown blocks - // Users of this endpoint will need to check observability to see them - tracing::warn!( - "Ignoring 'unknown' content block chunk when constructing OpenAI-compatible response" - ); + ContentBlockChunk::Unknown(chunk) => { + let len = extra_id_to_index.len(); + let insert_index = *extra_id_to_index.entry(chunk.id.clone()).or_insert(len); + extra_content.push(ExtraContentBlockChunk::Unknown { + insert_index, + chunk, + }); } } } - (content_str, tool_calls) + (content_str, tool_calls, extra_content) } /// Prepares an Event for SSE on the way out of the gateway. @@ -249,6 +288,7 @@ pub fn prepare_serialized_openai_compatible_events( ) -> impl Stream> { async_stream::stream! { let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let mut is_first_chunk = true; while let Some(chunk) = stream.next().await { @@ -261,6 +301,7 @@ pub fn prepare_serialized_openai_compatible_events( let openai_compatible_chunks = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, &response_model_prefix, is_first_chunk, include_usage, @@ -313,9 +354,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -369,9 +412,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -416,9 +461,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -473,9 +520,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -513,9 +562,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, true, // include_usage @@ -564,9 +615,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage = false @@ -606,9 +659,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage @@ -653,9 +708,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage @@ -692,9 +749,11 @@ mod tests { }); let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); let result = convert_inference_response_chunk_to_openai_compatible( chunk, &mut tool_id_to_index, + &mut extra_id_to_index, "test_prefix::", true, false, // include_usage @@ -734,18 +793,23 @@ mod tests { }), ]; let mut tool_id_to_index = HashMap::new(); - let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index); + let mut extra_id_to_index = HashMap::new(); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); assert_eq!(content_str, Some("Hello, world!".to_string())); assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls[0].id, Some("1".to_string())); assert_eq!(tool_calls[0].index, 0); assert_eq!(tool_calls[0].function.name, "test_tool".to_string()); assert_eq!(tool_calls[0].function.arguments, "{}"); + assert!(extra_content.is_empty()); let content: Vec = vec![]; - let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); assert_eq!(content_str, None); assert!(tool_calls.is_empty()); + assert!(extra_content.is_empty()); let content = vec![ ContentBlockChunk::Text(TextChunk { @@ -776,7 +840,9 @@ mod tests { }), ]; let mut tool_id_to_index = HashMap::new(); - let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index); + let mut extra_id_to_index = HashMap::new(); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); assert_eq!( content_str, Some("First part second part third part fourth part".to_string()) @@ -790,5 +856,187 @@ mod tests { assert_eq!(tool_calls[1].index, 1); assert_eq!(tool_calls[1].function.name, "last_tool".to_string()); assert_eq!(tool_calls[1].function.arguments, "{\"key\": \"value\"}"); + assert!(extra_content.is_empty()); + } + + #[test] + fn test_process_chat_content_chunk_with_extra_content() { + let content = vec![ + ContentBlockChunk::Thought(ThoughtChunk { + id: "thought_1".to_string(), + text: Some("Let me think...".to_string()), + signature: Some("sig123".to_string()), + summary_id: None, + summary_text: None, + provider_type: Some("anthropic".to_string()), + extra_data: None, + }), + ContentBlockChunk::Text(TextChunk { + id: "text_1".to_string(), + text: "Response text".to_string(), + }), + ContentBlockChunk::Unknown(UnknownChunk { + id: "unknown_1".to_string(), + data: serde_json::json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }), + ContentBlockChunk::Thought(ThoughtChunk { + id: "thought_1".to_string(), // Same ID, new chunk + text: Some(" more thinking".to_string()), + signature: None, + summary_id: None, + summary_text: None, + provider_type: None, + extra_data: None, + }), + ContentBlockChunk::Thought(ThoughtChunk { + id: "thought_2".to_string(), // New thought + text: Some("Different thought".to_string()), + signature: None, + summary_id: None, + summary_text: None, + provider_type: None, + extra_data: None, + }), + ]; + + let mut tool_id_to_index = HashMap::new(); + let mut extra_id_to_index = HashMap::new(); + let (content_str, tool_calls, extra_content) = + process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index); + + assert_eq!( + content_str, + Some("Response text".to_string()), + "Text content should be extracted" + ); + assert!(tool_calls.is_empty(), "No tool calls expected"); + assert_eq!( + extra_content.len(), + 4, + "Should have four extra content chunks" + ); + + // First: Thought chunk + match &extra_content[0] { + ExtraContentBlockChunk::Thought { + insert_index, + chunk, + } => { + assert_eq!(*insert_index, 0, "First thought should have insert_index 0"); + assert_eq!(chunk.id, "thought_1"); + assert_eq!(chunk.text, Some("Let me think...".to_string())); + assert_eq!(chunk.signature, Some("sig123".to_string())); + assert_eq!(chunk.provider_type, Some("anthropic".to_string())); + } + ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 0"), + } + + // Second: Unknown chunk + match &extra_content[1] { + ExtraContentBlockChunk::Unknown { + insert_index, + chunk, + } => { + assert_eq!(*insert_index, 1, "Unknown should have insert_index 1"); + assert_eq!(chunk.id, "unknown_1"); + assert_eq!(chunk.data, serde_json::json!({"custom": "data"})); + assert_eq!(chunk.model_name, Some("test_model".to_string())); + } + ExtraContentBlockChunk::Thought { .. } => panic!("Expected Unknown at position 1"), + } + + // Third: Second Thought chunk (same ID, same insert_index) + match &extra_content[2] { + ExtraContentBlockChunk::Thought { + insert_index, + chunk, + } => { + assert_eq!( + *insert_index, 0, + "Second chunk of thought_1 should still have insert_index 0" + ); + assert_eq!(chunk.id, "thought_1"); + assert_eq!(chunk.text, Some(" more thinking".to_string())); + } + ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 2"), + } + + // Fourth: New Thought chunk (new ID, new insert_index) + match &extra_content[3] { + ExtraContentBlockChunk::Thought { + insert_index, + chunk, + } => { + assert_eq!(*insert_index, 2, "New thought_2 should have insert_index 2"); + assert_eq!(chunk.id, "thought_2"); + assert_eq!(chunk.text, Some("Different thought".to_string())); + } + ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 3"), + } + + // Verify the hashmap tracked the IDs correctly + assert_eq!( + extra_id_to_index.get("thought_1"), + Some(&0), + "thought_1 should be mapped to index 0" + ); + assert_eq!( + extra_id_to_index.get("unknown_1"), + Some(&1), + "unknown_1 should be mapped to index 1" + ); + assert_eq!( + extra_id_to_index.get("thought_2"), + Some(&2), + "thought_2 should be mapped to index 2" + ); + } + + #[test] + fn test_extra_content_block_chunk_serialization() { + // Test Thought variant serialization + let thought_chunk = ExtraContentBlockChunk::Thought { + insert_index: 2, + chunk: ThoughtChunk { + id: "chunk_id".to_string(), + text: Some("Thinking...".to_string()), + signature: Some("sig_chunk".to_string()), + summary_id: Some("summary_1".to_string()), + summary_text: Some("Summary text".to_string()), + provider_type: Some("anthropic".to_string()), + extra_data: None, + }, + }; + let json = serde_json::to_value(&thought_chunk).unwrap(); + + // With tagged enum + flatten, type tag and flattened fields at top level + assert_eq!(json["type"], "thought"); + assert_eq!(json["insert_index"], 2); + assert_eq!(json["id"], "chunk_id"); + assert_eq!(json["text"], "Thinking..."); + assert_eq!(json["signature"], "sig_chunk"); + assert_eq!(json["summary_id"], "summary_1"); + assert_eq!(json["summary_text"], "Summary text"); + assert_eq!(json["provider_type"], "anthropic"); + + // Test Unknown variant serialization + let unknown_chunk = ExtraContentBlockChunk::Unknown { + insert_index: 5, + chunk: UnknownChunk { + id: "unknown_id".to_string(), + data: serde_json::json!({"custom": "data"}), + model_name: Some("test_model".to_string()), + provider_name: None, + }, + }; + let json = serde_json::to_value(&unknown_chunk).unwrap(); + + assert_eq!(json["type"], "unknown"); + assert_eq!(json["insert_index"], 5); + assert_eq!(json["id"], "unknown_id"); + assert_eq!(json["data"], serde_json::json!({"custom": "data"})); + assert_eq!(json["model_name"], "test_model"); } }